Pupil size light reflex to a light test is a potential test to determine a person is under the influence of THC and be able to be use as a field sobriety measures. The goals of the analysis to:
Pupil size light reflex to light does not habituated to THC use (smoking).
Time for smoking to post test maybe an important variable to model b/c:
There is a circadian pattern to pupil size (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7445830/).
Currently this field does not use FDA, so any FDA in topic area may be publishable
Data on pupil size during light reflex test. Pupil size was extracted at the image level based on image analysis techniques. Each test was performed simultaneously on right and left eye before and after THC use (smoking).
ps <- read.csv(file.path(data_folder, ps_folder, "pupil_data_with_demo_20220926.csv"))
ps$demo_gender <- factor(ps$demo_gender,
levels = c(1,2),
labels = c("Male", "Female"))
ps$user_cat <- NA
ps$user_cat[ps$user_type == "non-user"] <- 0
ps$user_cat[ps$user_type == "occasional"] <- 1
ps$user_cat[ps$user_type == "daily"] <- 2
ps$user_cat <- factor(ps$user_cat,
levels = c(0,1,2),
labels = c("non-user",
"occasional",
"daily"))
ps$tp <- NA
ps$tp[ps$time == "pre2"] <- 0
ps$tp[ps$time == "post"] <- 1
ps$tp <- factor(ps$tp,
levels = c(0,1),
labels = c("pre2", "post"))
# str(ps)
## -- Not needed with Ben's revised file missing frame numbers recorded
# #impute frame number
# for(i in 2:(nrow(ps)-1)){
# if(is.na(ps$frame[i]) & is.na(ps$frame[i+1] - ps$frame[i-1])){
# ps$frame[i] <- ps$frame[i-1]+1
# }else(if(is.na(ps$frame[i]) & (ps$frame[i+1] - ps$frame[i-1]) == 2){
# ps$frame[i] <- ps$frame[i-1]+1
# }else(if(is.na(ps$frame[i]) & (ps$frame[i+1] - ps$frame[i-1]) > 2){
# if(abs(ps$percent_change[i] - ps$percent_change[i-1]) <
# abs(ps$percent_change[i+1] - ps$percent_change[i])){
# ps$frame[i] <- ps$frame[i-1]+1
# }else(ps$frame[i] <- ps$frame[i+1]-1)
# }
# )
# )
# }
# total number of subjects
length(unique(ps$subject_id))
## [1] 101
# subject level data
pt.df <- unique(ps[, c("subject_id", "tp", "user_cat", "demo_age",
"demo_weight", "demo_height", "demo_gender", "thc")])
# # Demographics Table by User Category
# table1(~ demo_age + demo_weight + demo_height + demo_gender|user_cat,
# data = pt.df[pt.df$tp == "pre2", ])
# number of subjects by timepoint (pre/post)
table(pt.df$tp)
##
## pre2 post
## 99 95
There are more subjects in total than the by time point. Indicating incomplete data with some subjects missing “pre” and others missing “post” measurement.
Add scalar features for each participant-time point
scalar.feat <- read.csv(file.path(data_folder, ps_folder, "scalars_trim.csv"),
header = T, stringsAsFactors = F)
scalar.feat$subject_id <- substr(scalar.feat$timeid, 1, 7)
scalar.feat$tp <- substr(scalar.feat$timeid, 8, 11)
scalar.featR <- scalar.feat[scalar.feat$eye == "Right", ]
pt.df <- merge(pt.df, scalar.featR[, c("subject_id", "tp", "min_constriction",
"duration", "avg_end_percent", "slope",
"AUC", "eye")],
by = c("subject_id", "tp"))
Reshape participant demog data to preserve pre and post THC levels and scalar features
pt.df.w <- reshape(pt.df,
timevar = "tp",
idvar = c("subject_id", "user_cat", "demo_age",
"demo_weight", "demo_height", "demo_gender", "eye"),
direction = "wide")
pt.df.w$thc.post[pt.df.w$user_cat == "non-user" & is.na(pt.df.w$thc.post)] <- 0
pt.df.w$thc_chg <- pt.df.w$thc.post - pt.df.w$thc.pre2
pt.df.w$BMI <- pt.df.w$demo_weight*703/(pt.df.w$demo_height)^2
pt.df.w$min_constriction_chg <- pt.df.w$min_constriction.post - pt.df.w$min_constriction.pre2
pt.df.w$AUC_chg <- pt.df.w$AUC.post - pt.df.w$AUC.pre2
pt.df.w$duration_chg <- pt.df.w$duration.post - pt.df.w$duration.pre2
pt.df.w$avg_end_percent_chg <- pt.df.w$avg_end_percent.post - pt.df.w$avg_end_percent.pre2
pt.df.w$slope_chg <- pt.df.w$slope.post - pt.df.w$slope.pre2
## Need to work on; time formatted in Excel file.
testTimes <- read.xlsx(file.path(data_folder, ps_folder, "All SafetyScan Times_23Aug2022_revSG.xlsx"),
sheet = "Raw")
testTimes$pre_safetyscan_date_convert <- as.Date(testTimes$pre_safetyscan_date, origin = "1899-12-30")
testTimes$pre_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$pre_safetyscan_time_hr, ":", testTimes$pre_safetyscan_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$consump_start_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$consump_start_time_hr, ":", testTimes$consump_start_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$consump_end_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$consump_end_time_hr, ":", testTimes$consump_end_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$post_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$post_safetyscan_time_hr, ":", testTimes$post_safetyscan_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$postConsumpTimeToTest <- as.numeric(difftime(testTimes$post_safetyscan_time_convert,
testTimes$consump_end_time_convert,
units = "mins"))
testTimes$Post_PreTime <- as.numeric(difftime(testTimes$post_safetyscan_time_convert,
testTimes$pre_safetyscan_time_convert,
units = "mins"))
testTimes$remove <- ifelse(testTimes$subject_id == "001-056" & is.na(testTimes$postConsumpTimeToTest), 1, 0)
testTimes <- testTimes[testTimes$remove == 0, ]
# ## For subjects without a post Consumption to test time, we're using the time between pre and post test -- DO NOT "impute" this way -- JW 11/11/2022
# testTimes$postConsumpTimeToTest[is.na(testTimes$postConsumpTimeToTest)] <- testTimes$Post_PreTime[is.na(testTimes$postConsumpTimeToTest)]
pt.df <- merge(pt.df.w, testTimes[, c("subject_id", "postConsumpTimeToTest", "Post_PreTime")],
by = "subject_id")
by(pt.df$postConsumpTimeToTest, INDICES = list(pt.df$user_cat), summary)
## : non-user
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## NA NA NA NaN NA NA 32
## ------------------------------------------------------------
## : occasional
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 54.00 60.50 64.00 64.19 67.00 84.00
## ------------------------------------------------------------
## : daily
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 53.00 58.00 60.00 61.15 65.00 74.00
plot(pt.df$Post_PreTime, pt.df$postConsumpTimeToTest)
ps <- merge(ps, testTimes[, c("subject_id", "postConsumpTimeToTest", "Post_PreTime")],
by = "subject_id")
non_user.id <- pt.df$subject_id[pt.df$user_cat == "non-user"]
# View(testTimes[testTimes$subject_id %in% non_user.id, ])
Add VAS data
vs <- read_sas(file.path(data_folder, ps_folder, "urban102.sas7bdat"))
vs <- vs[, c("subject_id", "period", "vas")]
attr(vs$subject_id, "label") <- NULL
attr(vs$subject_id, "format.sas") <- NULL
vas.post <- vs[vs$period == "POST", ]
vas.post$period <- NULL
names(vas.post)[names(vas.post) == "vas"] <- "vas.post"
vas.pre <- vs[vs$period == "PRE", ]
vas.pre$period <- NULL
names(vas.pre)[names(vas.pre) == "vas"] <- "vas.pre"
vas.w <- merge(vas.pre, vas.post, by = "subject_id")
vas.w$diff <- vas.w$vas.post - vas.w$vas.pre
summary(vas.w$diff)
## no difference between pre and post vas
vas.w <- vas.w[, c("subject_id", "vas.pre")]
names(vas.w)[names(vas.w) == "vas.pre"] <- "vas"
pt.df <- merge(pt.df, vas.w,
by = "subject_id",
all.x = T)
ps <- merge(ps, vas.w,
by = "subject_id",
all.x = T)
vs <- read.xlsx(file.path(data_folder, ps_folder,
"All CDPHE VAS Scores_30Nov2022.xlsx"),
sheet = "VAS")
### Data Dictionary
# "subject_id"
# "vas0_high_score" -- vas score for how high you feel at pre
# "vas0_score_dc" -- how confident you feel about driving at pre
# "vas1_high_score" -- vas score for how high you feel at post
# "vas1_score_dc" -- how confident you feel about driving at post
pt.df <- merge (pt.df, vs,
by = "subject_id",
all.x = T)
pt.df$vas_high_score_chg <- pt.df$vas1_high_score - pt.df$vas0_high_score
pt.df$vas_score_dc_chg <- pt.df$vas1_score_dc - pt.df$vas0_score_dc
No Consumption end time recorded for non-users
# Demographics Table by User Category
table1(~ demo_age + demo_weight+ demo_height+BMI + demo_gender + thc_chg + postConsumpTimeToTest + Post_PreTime + min_constriction_chg + AUC_chg + duration_chg + slope_chg + avg_end_percent_chg + vas1_high_score + vas_high_score_chg + vas1_score_dc + vas_score_dc_chg|user_cat,
data = pt.df)
| non-user (N=32) |
occasional (N=36) |
daily (N=33) |
Overall (N=101) |
|
|---|---|---|---|---|
| demo_age | ||||
| Mean (SD) | 32.9 (4.90) | 31.6 (4.98) | 33.5 (5.69) | 32.6 (5.21) |
| Median [Min, Max] | 33.5 [25.7, 42.2] | 30.1 [25.1, 41.9] | 32.6 [25.4, 45.3] | 32.3 [25.1, 45.3] |
| demo_weight | ||||
| Mean (SD) | 171 (48.5) | 173 (33.4) | 176 (33.1) | 173 (38.4) |
| Median [Min, Max] | 165 [85.0, 284] | 170 [105, 240] | 175 [113, 250] | 170 [85.0, 284] |
| demo_height | ||||
| Mean (SD) | 68.0 (4.83) | 69.6 (3.60) | 68.1 (3.52) | 68.6 (4.04) |
| Median [Min, Max] | 67.0 [60.0, 78.0] | 69.5 [61.0, 76.0] | 69.0 [59.0, 76.0] | 69.0 [59.0, 78.0] |
| BMI | ||||
| Mean (SD) | 25.5 (4.89) | 25.0 (4.02) | 26.7 (4.42) | 25.7 (4.46) |
| Median [Min, Max] | 24.5 [16.6, 34.2] | 24.5 [16.9, 32.6] | 25.8 [18.9, 34.9] | 25.1 [16.6, 34.9] |
| demo_gender | ||||
| Male | 13 (40.6%) | 23 (63.9%) | 18 (54.5%) | 54 (53.5%) |
| Female | 19 (59.4%) | 13 (36.1%) | 15 (45.5%) | 47 (46.5%) |
| thc_chg | ||||
| Mean (SD) | 0 (0) | 8.20 (9.71) | 29.9 (31.9) | 11.7 (21.9) |
| Median [Min, Max] | 0 [0, 0] | 5.73 [0, 46.6] | 17.8 [1.32, 124] | 3.93 [0, 124] |
| Missing | 2 (6.3%) | 7 (19.4%) | 8 (24.2%) | 17 (16.8%) |
| postConsumpTimeToTest | ||||
| Mean (SD) | NA (NA) | 64.2 (6.52) | 61.2 (4.87) | 62.7 (5.95) |
| Median [Min, Max] | NA [NA, NA] | 64.0 [54.0, 84.0] | 60.0 [53.0, 74.0] | 62.0 [53.0, 84.0] |
| Missing | 32 (100%) | 0 (0%) | 0 (0%) | 32 (31.7%) |
| Post_PreTime | ||||
| Mean (SD) | 111 (10.8) | 116 (9.20) | 116 (7.93) | 114 (9.54) |
| Median [Min, Max] | 109 [99.0, 148] | 113 [104, 147] | 117 [98.0, 137] | 112 [98.0, 148] |
| min_constriction_chg | ||||
| Mean (SD) | 6.22 (8.36) | 13.3 (10.5) | 11.4 (9.49) | 10.3 (9.87) |
| Median [Min, Max] | 7.35 [-14.6, 19.6] | 14.1 [-15.1, 34.4] | 9.92 [-1.72, 35.8] | 11.1 [-15.1, 35.8] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
| AUC_chg | ||||
| Mean (SD) | 3.06 (9.51) | 6.63 (7.82) | 7.67 (7.78) | 5.71 (8.56) |
| Median [Min, Max] | 4.21 [-21.6, 21.2] | 5.36 [-13.5, 23.5] | 5.46 [-1.82, 36.0] | 5.36 [-21.6, 36.0] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
| duration_chg | ||||
| Mean (SD) | -6.48 (58.0) | -16.4 (78.2) | -3.96 (42.9) | -9.29 (61.9) |
| Median [Min, Max] | -10.0 [-145, 152] | -8.00 [-219, 198] | -2.00 [-105, 108] | -7.00 [-219, 198] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
| slope_chg | ||||
| Mean (SD) | -0.0239 (0.0348) | -0.0398 (0.0384) | -0.0323 (0.0523) | -0.0321 (0.0420) |
| Median [Min, Max] | -0.0224 [-0.0959, 0.0727] | -0.0321 [-0.136, 0.0321] | -0.0259 [-0.176, 0.0372] | -0.0290 [-0.176, 0.0727] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
| avg_end_percent_chg | ||||
| Mean (SD) | 0.687 (9.11) | 0.836 (5.84) | 4.54 (9.08) | 1.89 (8.17) |
| Median [Min, Max] | 1.02 [-26.8, 20.7] | 0.627 [-14.6, 13.7] | 2.70 [-5.32, 42.3] | 1.30 [-26.8, 42.3] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
| vas1_high_score | ||||
| Mean (SD) | 0 (0) | 46.4 (17.4) | 47.0 (16.1) | 31.9 (25.8) |
| Median [Min, Max] | 0 [0, 0] | 48.5 [4.00, 79.0] | 46.0 [13.0, 81.0] | 37.0 [0, 81.0] |
| vas_high_score_chg | ||||
| Mean (SD) | 0 (0) | 45.6 (17.2) | 46.5 (16.4) | 31.4 (25.6) |
| Median [Min, Max] | 0 [0, 0] | 47.5 [4.00, 79.0] | 45.0 [13.0, 80.0] | 35.0 [0, 80.0] |
| vas1_score_dc | ||||
| Mean (SD) | 99.4 (1.85) | 42.5 (32.3) | 62.9 (32.5) | 67.2 (35.5) |
| Median [Min, Max] | 100 [90.0, 100] | 31.0 [0, 97.0] | 73.0 [0, 100] | 78.0 [0, 100] |
| vas_score_dc_chg | ||||
| Mean (SD) | 0.0781 (1.22) | -56.9 (32.4) | -36.7 (32.3) | -32.3 (35.5) |
| Median [Min, Max] | 0 [-3.00, 5.00] | -67.5 [-100, -3.00] | -27.0 [-100, 1.00] | -22.0 [-100, 5.00] |
I’m looking for any sharp declines over 10 frames after the 150th frame and then visualizing those trajectories to determine if there might be misalignment of the light test start frame.
ps$lagPctChg <- c(rep(0, 10), diff(ps$percent_change, lag = 10))
ps$lagID <- dplyr::lag(ps$subject_id, 10)
ps$lagtime <- dplyr::lag(ps$time, 10)
ps$lagEye <- dplyr::lag(ps$eye, 10)
ps$lagPctChg <- ifelse(ps$subject_id == ps$lagID & ps$time == ps$lagtime & ps$eye == ps$lagEye,
ps$lagPctChg, 0)
summary(ps$lagPctChg[ps$frame > 150])
hist(ps$lagPctChg[ps$frame > 150], breaks = 1000)
ps.150 <- ps[ps$frame > 150 & ps$eye == "Left", ]
pot.misaligned <- unique(ps.150[ps.150$lagPctChg <= -15, c("subject_id", "time", "user_type", "eye")])
pot.misaligned <- pot.misaligned[!(is.na(pot.misaligned$subject_id)), ]
pot.misaligned$misaligned <- 1
misaligned <- merge(ps, pot.misaligned,
by= c("subject_id", "time", "user_type", "eye"),
all.y = T)
mis.right <- misaligned[misaligned$eye == "Left", ]
misAlign.id <- unique(misaligned$subject_id)
for(i in misAlign.id){
print(ggplot(mis.right[mis.right$subject_id == i, ],
aes(x=frame, y = percent_change,
group = subject_id, color = i))+
geom_line()+
facet_grid(rows = vars(subject_id), cols = vars(tp))+
labs(title = paste0("Potential MisAligned Subject: ", i))+
theme_bw())
}
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_109.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-109", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned ")+
# theme_bw()
# dev.off()
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_060.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-060", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned ")+
# theme_bw()
# dev.off()
### New alignment view
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_007.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-007", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: start at 2nd bump?")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_033.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-033", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Odd pattern")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_038.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-038", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Odd pattern")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_043.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-043", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Both look odd, but mainly check pre; maybe review video")+
# theme_bw()
# dev.off()
Remove Outliers
## Remove known outliers
ps$outliers <- 0
# ps$outliers[ps$subject_id == "001-109" & ps$tp == "pre2"] <- 1 # Ben revised
ps$outliers[ps$subject_id == "001-060" & ps$tp == "post"] <- 1
ps <- ps[ps$outliers == 0, ]
Plot of Pupil Size and Percent Change for “PRE” data by Eye and User Category
pre.df <- ps[ps$tp == "pre2", ]
ggplot(pre.df, aes(x=frame/30, y = pupil_size, group = subject_id))+
geom_line(alpha = 0.5, show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title ="Pupil Size by Eye and THC use category",
x = "seconds")+
theme_bw()
ggplot(pre.df, aes(x=frame/30, y = percent_change, group = subject_id))+
geom_line(alpha = 0.5, show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title = "Percent Change by Eye and THC use category",
x = "seconds")+
theme_bw()
Plot of PRE/POST for Right Eye
right.df <- ps[ps$eye == "Right", ]
ggplot(right.df, aes(x=frame/30, y = pupil_size, group = subject_id))+
geom_line(show.legend = FALSE, alpha = 0.5)+
facet_grid(rows = vars(user_cat), cols = vars(tp))+
labs(title ="Pupil Size by Pre/Post and THC use category",
x = "seconds")+
theme_bw()
# jpeg(file.path(res_folder, "PrePost_Right_PercentChange.jpg"),
# height = 6, width = 8, res = 300, units = "in")
ggplot(right.df, aes(x=frame/30, y = percent_change, group = subject_id))+
geom_line(show.legend = FALSE, alpha = 0.5)+
facet_grid(rows = vars(user_cat), cols = vars(tp))+
labs(title = "Percent Change by Pre/Post and THC use category",
x = "seconds")+
theme_bw()
# dev.off()
Plots of Left and Right Eye for “POST” data. One pt has negative values for pupil size.
post.df <- ps[ps$tp == "post", ]
ggplot(post.df, aes(x=frame/30, y = percent_change, group = subject_id))+
geom_line(show.legend = FALSE, alpha = 0.5)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title = "Percent Change by Eye and THC use category - Post",
x = "seconds")+
theme_bw()
pre.df <- ps[ps$tp == "pre2", ]
# jpeg(file.path(res_folder, "LeftRight_Pre_PercentChange.jpg"),
# height = 6, width = 8, res = 300, units = "in")
ggplot(pre.df, aes(x=frame/30, y = percent_change, group = subject_id))+
geom_line(show.legend = FALSE, alpha = 0.5)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title = "Percent Change by Eye and THC use category - Pre",
x = "seconds")+
theme_bw()
# dev.off()
Plot the Mean and +/- 1 SD functions for the percent change in pupil light reflex data for the pre and post data by THC user category. (Added code to input frame numbers when NA).
right_0.df <- right.df[right.df$frame >= 0, c("subject_id", "tp", "user_cat",
"frame", "percent_change")]
right_0.df <- right_0.df[order(right_0.df$subject_id, right_0.df$tp, right_0.df$frame), ]
right_0.w <- reshape(right_0.df,
timevar = "frame",
idvar = c("subject_id", "tp", "user_cat"),
direction = "wide")
rownames(right_0.w) <- paste0(right_0.w$subject_id, "_", right_0.w$tp)
pct_chg <- names(right_0.w[, 4:602])
mean_fxn <- as.data.frame(aggregate(right_0.w[, 4:602],
list(right_0.w$tp,
right_0.w$user_cat),
FUN = function(x) mean(x, na.rm = T)))
mean_fxn.l <- reshape(mean_fxn,
varying = pct_chg,
v.names = "percent_change",
timevar = "frame",
times = pct_chg,
direction = "long")
mean_fxn.l$frame <- as.numeric(gsub("percent_change.", "", mean_fxn.l$frame))
rownames(mean_fxn.l) <- NULL
mean_fxn.l$id <- NULL
names(mean_fxn.l)[names(mean_fxn.l) == "Group.1"] <- "tp"
names(mean_fxn.l)[names(mean_fxn.l) == "Group.2"] <- "user_cat"
mean_fxn.l$grp <- paste0(mean_fxn.l$tp, "_", mean_fxn.l$user_cat)
ggplot(mean_fxn.l, aes(x=frame/30, y = percent_change, group = grp,
color = user_cat))+
geom_line()+
facet_grid(rows = vars(tp))+
labs(title = "Average Percent Change by Pre/Post and THC use category",
x = "seconds")+
theme_bw()
## Warning: Removed 449 row(s) containing missing values (geom_path).
sd_fxn <- as.data.frame(aggregate(right_0.w[, 4:602],
list(right_0.w$tp,
right_0.w$user_cat),
FUN = function(x) sd(x, na.rm = T)))
NA’s are when there’s no data in for that time point and user category. Min frame value for NA is 480.
Truncating to frame 400
right_0.fpca <- fpca.face(Y = as.matrix(right_0.w[, 4:404]), pve = 0.95, knots = 30, var = T)
# str(right_0.fpca)
# plot_shiny(right_0.fpca)
right_0.fpca.pve <- round(right_0.fpca$evalues/sum(right_0.fpca$evalues)*100, 2)
# right_0.fpca.pve
mu <- right_0.fpca$mu
right_sd <- sqrt(right_0.fpca$evalues)
right_Phi <- right_0.fpca$efunctions
df_plot <- data.frame(seconds = seq(0, 400, by = 1)/30,
mu = mu,
errband1 = 2*right_Phi[, 1]*right_sd[1],
errband2 = 2*right_Phi[, 2]*right_sd[2],
errband3 = 2*right_Phi[, 3]*right_sd[3],
errband4 = 2*right_Phi[, 4]*right_sd[4],
errband5 = 2*right_Phi[, 5]*right_sd[5]
)
ylim_max = max(df_plot$mu) + max(df_plot [, c("errband1", "errband2","errband3","errband4","errband5")], na.rm = T)
ylim_min = min(df_plot$mu) - max(df_plot [, c("errband1", "errband2","errband3","errband4","errband5")], na.rm = T)
colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
plot_pc1 <- ggplot(data = df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband1, color = "-2SD"))+
labs(x = "seconds",
y = "Percent Change",
color = "Legend",
title = paste("PC1:", "% Var Explained:", right_0.fpca.pve[1]))+
scale_color_manual(values = colors)+
scale_y_continuous(limits = c(ylim_min, ylim_max))+
theme_bw()
plot_pc2 <- ggplot(data = df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband2, color = "-2SD"))+
labs(x = "seconds",
y = "Percent Change",
color = "Legend",
title = paste("PC2:", "% Var Explained:", right_0.fpca.pve[2]))+
scale_color_manual(values = colors)+
scale_y_continuous(limits = c(ylim_min, ylim_max))+
theme_bw()
plot_pc3 <- ggplot(data = df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband3, color = "-2SD"))+
labs(x = "seconds",
y = "Percent Change",
color = "Legend",
title = paste("PC3:", "% Var Explained:", right_0.fpca.pve[3]))+
scale_color_manual(values = colors)+
scale_y_continuous(limits = c(ylim_min, ylim_max))+
theme_bw()
plot_pc4 <- ggplot(data = df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband4, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband4, color = "-2SD"))+
labs(x = "seconds",
y = "Percent Change",
color = "Legend",
title = paste("PC4:", "% Var Explained:", right_0.fpca.pve[4]))+
scale_color_manual(values = colors)+
scale_y_continuous(limits = c(ylim_min, ylim_max))+
theme_bw()
plot_pc5 <- ggplot(data = df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband5, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband5, color = "-2SD"))+
labs(x = "seconds",
y = "Percent Change",
color = "Legend",
title = paste("PC5:", "% Var Explained:", right_0.fpca.pve[5]))+
scale_color_manual(values = colors)+
scale_y_continuous(limits = c(ylim_min, ylim_max))+
theme_bw()
legend_postPC <- g_legend(plot_pc1)
blank <- grid.rect(gp=gpar(col="white"))
grid.arrange(plot_pc1+theme(legend.position = "none"),
plot_pc2+theme(legend.position = "none"),
plot_pc3+theme(legend.position = "none"),
plot_pc4+theme(legend.position = "none"),
plot_pc5+theme(legend.position = "none"),
blank,
legend_postPC,
layout_matrix = rbind(c(1,2,3,7), c(4,5, 6 , 7)),
widths = c(10, 10, 10, 3),
nrow = 2, ncol = 4)
# jpeg(file.path(res_folder, "Post_PC4.jpg"),
# height = 6, width = 8, res = 300, units = "in")
grid.arrange(plot_pc1+theme(legend.position = "none"),
plot_pc2+theme(legend.position = "none"),
plot_pc3+theme(legend.position = "none"),
plot_pc4+theme(legend.position = "none"),
legend_postPC,
layout_matrix = rbind(c(1,2,5), c(3,4,5)),
widths = c(10, 10, 3),
nrow = 2, ncol = 3)
# dev.off()
PC1 plot: Individuals with low loading on PC1 (-2SD) have less constriction than the average and individuals with a high loading (+2SD) have more constriction. Rebound effect?
PC2 plot: Overall shape of trajectory & pattern of recovery
Plot individuals with high/low PC2 overall scores.
right_scores <- right_0.fpca$scores
q.95 <- quantile(right_scores[, 2], p = 0.95)
highPC2 <- rownames(right_scores)[right_scores[, 2] > q.95]
highPC2.df <- data.frame(subject_id = substr(highPC2, 1, 7),
tp = substr(highPC2, 9,12))
for(i in 1:nrow(highPC2.df)){
plot.df <- right_0.df[right_0.df$subject_id == highPC2.df$subject_id[i] & right_0.df$tp == highPC2.df$tp[i], ]
print(ggplot(plot.df, aes(x=frame30, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
labs(title = paste("Percent Change for high PC2 score --", "Subject:",
highPC2.df$subject_id[i], "Timepoint:", highPC2.df$tp[i]),
x = "seconds")+
theme_bw())
}
q.05 <- quantile(right_scores[, 2], p = 0.05)
lowPC2 <- rownames(right_scores)[right_scores[, 2] < q.05]
lowPC2.df <- data.frame(subject_id = substr(lowPC2, 1, 7),
tp = substr(lowPC2, 9,12))
for(i in 1:nrow(lowPC2.df)){
plot.df <- right_0.df[right_0.df$subject_id == lowPC2.df$subject_id[i] & right_0.df$tp == lowPC2.df$tp[i], ]
print(ggplot(plot.df, aes(x=frame/30, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
labs(title = paste("Percent Change for low PC2 score --", "Subject:", lowPC2.df$subject_id[i], "Timepoint:", lowPC2.df$tp[i]),
x = "seconds")+
theme_bw())
}
Make sure datasets have the same participants and are truncated at frame 400
ps <- ps[ps$frame >= 0 & ps$frame <= 400,
c("subject_id", "tp", "eye", "frame", "percent_change")]
ps <- ps[order(ps$subject_id, ps$tp, ps$eye, ps$frame), ]
ps$frame_char <- str_pad(ps$frame, width = 3, side = c("left"), pad = "0")
ps$frame <- NULL
ps.w <- reshape(ps[, c("subject_id","tp", "eye", "frame_char", "percent_change")],
timevar = "frame_char",
idvar = c("subject_id", "tp", "eye"),
direction = "wide")
var.names <- c(names(ps.w)[1:3], sort(names(ps.w)[4:404]))
ps.w <- ps.w[, var.names]
id.names <- paste(ps.w$subject_id, ps.w$tp, ps.w$eye, sep = "_")
rownames(ps.w) <- id.names
missData <- apply(ps.w[, 4:404], MARGIN = 1, FUN = function(x) sum(is.na(x)))
missInterpolate <- function(x){
z <- zoo(x, order.by = seq(0:400))
y <- na.approx(z, maxgap = 3, na.rm = FALSE)
return(as.numeric(y))
}
test <- apply(ps.w[, 4:404],
MARGIN = 1,
FUN = missInterpolate)
ps.w <- data.frame(t(test))
names(ps.w) <- var.names[4:404]
ps.w$subject_id <- substr(rownames(ps.w), 1, 7)
ps.w$tp <- ifelse(grepl("pre2", rownames(ps.w)) == T, "pre2", "post")
ps.w$eye <- ifelse(grepl("Left", rownames(ps.w)) == T, "Left", "Right")
ps.w <- ps.w[, var.names]
pctChg.names <- var.names[4:404]
ps <- reshape(ps.w,
varying = pctChg.names,
v.names = "percent_change",
timevar = "frame",
times = 0:400,
idvar = c("subject_id", "tp", "eye"),
direction = "long")
ps <- ps[order(ps$subject_id, ps$tp, ps$eye), ]
right_0.post <- ps[ps$tp == "post" & ps$eye == "Right", ]
post.ids <- unique(right_0.post$subject_id)
right_0.post.w <- ps.w[ps.w$tp == "post" & ps.w$eye == "Right", ]
right_0.pt <- reshape(ps[ps$eye == "Right", ],
timevar = "tp",
idvar = c("subject_id", "frame"),
direction = "wide")
right_0.pt$wiPctChg <- right_0.pt$percent_change.post - right_0.pt$percent_change.pre2
right_0.pt <- right_0.pt[, c("subject_id", "frame", "wiPctChg")]
right_0.pt.w <- reshape(right_0.pt[, c("subject_id", "frame", "wiPctChg")],
timevar = "frame",
idvar = "subject_id",
direction = "wide")
# remove rows where all data is missing (e.g. pt didn't have both pre and post)
allMissing <- rowSums(is.na(right_0.pt.w[, 2:402]))
rowsAllMissing <- names(allMissing)[allMissing == 401]
right_0.pt.w <- right_0.pt.w[!(rownames(right_0.pt.w) %in% rowsAllMissing), ]
################################# ----------------------------- Left eye
left_0.post <- ps[ps$tp == "post" & ps$eye == "Left", ]
left.post.ids <- unique(left_0.post$subject_id)
left_0.post.w <- ps.w[ps.w$tp == "post" & ps.w$eye == "Left", ]
left_0.pt <- reshape(ps[ps$eye == "Left", ],
timevar = "tp",
idvar = c("subject_id", "frame"),
direction = "wide")
left_0.pt$wiPctChg <- left_0.pt$percent_change.post - left_0.pt$percent_change.pre2
left_0.pt <- left_0.pt[, c("subject_id", "frame", "wiPctChg")]
left_0.pt.w <- reshape(left_0.pt[, c("subject_id", "frame", "wiPctChg")],
timevar = "frame",
idvar = "subject_id",
direction = "wide")
# remove rows where all data is missing (e.g. pt didn't have both pre and post)
allMissing <- rowSums(is.na(left_0.pt.w[, 2:402]))
rowsAllMissing <- names(allMissing)[allMissing == 401]
left_0.pt.w <- left_0.pt.w[!(rownames(left_0.pt.w) %in% rowsAllMissing), ]
## Find intersection of all participant files across data sets to define the analytic subjects
analytic.N <- intersect(unique(right_0.post$subject_id), unique(right_0.pt$subject_id))
analytic.N <- intersect(analytic.N, unique(right_0.post.w$subject_id))
analytic.N <- intersect(analytic.N, unique(right_0.pt.w$subject_id))
analytic_L.N <- intersect(unique(left_0.post.w$subject_id), unique(left_0.post$subject_id))
analytic_L.N <- intersect(analytic.N, unique(left_0.pt$subject_id))
analytic_L.N <- intersect(analytic.N, unique(left_0.pt.w$subject_id))
analytic_LR.N <- intersect(analytic.N, analytic_L.N)
## Filter all datasets to analytic sample
left_0.post <- left_0.post[left_0.post$subject_id %in% analytic_LR.N, ]
left_0.post$seconds <- left_0.post$frame/30
left_0.post.w <- left_0.post.w[left_0.post.w$subject_id %in% analytic_LR.N ,]
row.names(left_0.post.w) <- left_0.post.w$subject_id
left_0.post <- merge(left_0.post, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
left_0.post.w <- merge(left_0.post.w, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
rownames(left_0.post.w) <- left_0.post.w$subject_id
left_0.pt <- left_0.pt[left_0.pt$subject_id %in% analytic_LR.N, ]
left_0.pt$seconds <- left_0.pt$frame/30
left_0.pt.w <- left_0.pt.w[left_0.pt.w$subject_id %in% analytic_LR.N, ]
row.names(left_0.pt.w) <- left_0.pt.w$subject_id
left_0.pt <- merge(left_0.pt, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
left_0.pt.w <- merge(left_0.pt.w, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
rownames(left_0.pt.w) <- left_0.pt.w$subject_id
## Filter all datasets to analytic sample
right_0.post <- right_0.post[right_0.post$subject_id %in% analytic.N, ]
right_0.post$seconds <- right_0.post$frame/30
right_0.post.w <- right_0.post.w[right_0.post.w$subject_id %in% analytic.N ,]
row.names(right_0.post.w) <- right_0.post.w$subject_id
right_0.post <- merge(right_0.post, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
right_0.post.w <- merge(right_0.post.w, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
rownames(right_0.post.w) <- right_0.post.w$subject_id
right_0.pt <- right_0.pt[right_0.pt$subject_id %in% analytic.N, ]
right_0.pt$seconds <- right_0.pt$frame/30
right_0.pt.w <- right_0.pt.w[right_0.pt.w$subject_id %in% analytic.N, ]
right_0.pt <- merge(right_0.pt, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
right_0.pt.w <- merge(right_0.pt.w, pt.df[, c("subject_id", "user_cat")],
by = "subject_id",
all.x = T)
rownames(right_0.pt.w) <- right_0.pt.w$subject_id
Send Merged dataset to Dr. Wrobel (20221128)
right_0.post <- merge(right_0.post, pt.df,
by = "subject_id")
right_0.pt <- merge(right_0.pt, pt.df,
by = "subject_id")
saveRDS(right_0.post, file.path(data_folder, ps_folder,
"PupilLightReflex_POST_rightEye_20221208.rds"))
saveRDS(right_0.pt, file.path(data_folder, ps_folder,
"PupilLightReflex_WPD_rightEye_20221208.rds"))
saveRDS(pt.df[pt.df$subject_id %in% analytic.N,], file.path(data_folder, ps_folder,
"PupilLightReflex_PtData_20221208.rds"))
No Consumption end time recorded for non-users
# Demographics Table by User Category
table1(~demo_age + demo_weight+ demo_height+BMI + demo_gender + thc_chg + postConsumpTimeToTest + Post_PreTime + min_constriction_chg + AUC_chg + duration_chg + slope_chg + avg_end_percent_chg +vas1_high_score + vas_high_score_chg + vas1_score_dc + vas_score_dc_chg|user_cat,
data = pt.df[pt.df$subject_id %in% analytic.N, ])
| non-user (N=29) |
occasional (N=30) |
daily (N=25) |
Overall (N=84) |
|
|---|---|---|---|---|
| demo_age | ||||
| Mean (SD) | 32.3 (4.70) | 31.1 (4.75) | 32.8 (5.71) | 32.0 (5.02) |
| Median [Min, Max] | 31.1 [25.7, 42.2] | 29.7 [25.1, 41.9] | 32.0 [25.4, 45.3] | 31.1 [25.1, 45.3] |
| demo_weight | ||||
| Mean (SD) | 167 (48.8) | 169 (34.0) | 180 (29.8) | 172 (38.7) |
| Median [Min, Max] | 162 [85.0, 284] | 165 [105, 240] | 175 [125, 250] | 169 [85.0, 284] |
| demo_height | ||||
| Mean (SD) | 68.0 (4.97) | 69.5 (3.84) | 68.5 (3.40) | 68.7 (4.16) |
| Median [Min, Max] | 67.0 [60.0, 78.0] | 69.5 [61.0, 76.0] | 69.0 [63.0, 76.0] | 69.0 [60.0, 78.0] |
| BMI | ||||
| Mean (SD) | 24.9 (4.72) | 24.5 (3.96) | 27.1 (4.26) | 25.4 (4.41) |
| Median [Min, Max] | 23.9 [16.6, 34.1] | 24.3 [16.9, 32.5] | 26.8 [19.0, 34.9] | 24.7 [16.6, 34.9] |
| demo_gender | ||||
| Male | 13 (44.8%) | 20 (66.7%) | 16 (64.0%) | 49 (58.3%) |
| Female | 16 (55.2%) | 10 (33.3%) | 9 (36.0%) | 35 (41.7%) |
| thc_chg | ||||
| Mean (SD) | 0 (0) | 8.20 (9.71) | 29.9 (31.9) | 11.9 (22.0) |
| Median [Min, Max] | 0 [0, 0] | 5.73 [0, 46.6] | 17.8 [1.32, 124] | 3.96 [0, 124] |
| Missing | 0 (0%) | 1 (3.3%) | 0 (0%) | 1 (1.2%) |
| postConsumpTimeToTest | ||||
| Mean (SD) | NA (NA) | 63.9 (6.26) | 60.2 (3.78) | 62.2 (5.57) |
| Median [Min, Max] | NA [NA, NA] | 63.5 [54.0, 84.0] | 60.0 [53.0, 67.0] | 62.0 [53.0, 84.0] |
| Missing | 29 (100%) | 0 (0%) | 0 (0%) | 29 (34.5%) |
| Post_PreTime | ||||
| Mean (SD) | 110 (8.90) | 116 (9.72) | 116 (6.58) | 114 (8.97) |
| Median [Min, Max] | 109 [99.0, 131] | 114 [104, 147] | 117 [106, 137] | 112 [99.0, 147] |
| min_constriction_chg | ||||
| Mean (SD) | 6.22 (8.36) | 13.3 (10.5) | 11.4 (9.49) | 10.3 (9.87) |
| Median [Min, Max] | 7.35 [-14.6, 19.6] | 14.1 [-15.1, 34.4] | 9.92 [-1.72, 35.8] | 11.1 [-15.1, 35.8] |
| AUC_chg | ||||
| Mean (SD) | 3.06 (9.51) | 6.63 (7.82) | 7.67 (7.78) | 5.71 (8.56) |
| Median [Min, Max] | 4.21 [-21.6, 21.2] | 5.36 [-13.5, 23.5] | 5.46 [-1.82, 36.0] | 5.36 [-21.6, 36.0] |
| duration_chg | ||||
| Mean (SD) | -6.48 (58.0) | -16.4 (78.2) | -3.96 (42.9) | -9.29 (61.9) |
| Median [Min, Max] | -10.0 [-145, 152] | -8.00 [-219, 198] | -2.00 [-105, 108] | -7.00 [-219, 198] |
| slope_chg | ||||
| Mean (SD) | -0.0239 (0.0348) | -0.0398 (0.0384) | -0.0323 (0.0523) | -0.0321 (0.0420) |
| Median [Min, Max] | -0.0224 [-0.0959, 0.0727] | -0.0321 [-0.136, 0.0321] | -0.0259 [-0.176, 0.0372] | -0.0290 [-0.176, 0.0727] |
| avg_end_percent_chg | ||||
| Mean (SD) | 0.687 (9.11) | 0.836 (5.84) | 4.54 (9.08) | 1.89 (8.17) |
| Median [Min, Max] | 1.02 [-26.8, 20.7] | 0.627 [-14.6, 13.7] | 2.70 [-5.32, 42.3] | 1.30 [-26.8, 42.3] |
| vas1_high_score | ||||
| Mean (SD) | 0 (0) | 48.2 (17.8) | 47.2 (14.7) | 31.3 (26.4) |
| Median [Min, Max] | 0 [0, 0] | 51.3 [4.00, 79.0] | 46.0 [13.0, 78.0] | 37.5 [0, 79.0] |
| vas_high_score_chg | ||||
| Mean (SD) | 0 (0) | 47.6 (17.5) | 47.0 (14.7) | 31.0 (26.1) |
| Median [Min, Max] | 0 [0, 0] | 50.0 [4.00, 79.0] | 45.0 [13.0, 77.0] | 37.5 [0, 79.0] |
| vas1_score_dc | ||||
| Mean (SD) | 99.4 (1.93) | 43.7 (32.3) | 62.0 (31.9) | 68.4 (35.1) |
| Median [Min, Max] | 100 [90.0, 100] | 34.5 [0, 97.0] | 73.0 [0, 100] | 79.0 [0, 100] |
| vas_score_dc_chg | ||||
| Mean (SD) | -0.0517 (1.17) | -55.6 (32.4) | -37.6 (31.7) | -31.1 (35.0) |
| Median [Min, Max] | 0 [-3.00, 5.00] | -65.5 [-100, -3.00] | -27.0 [-100, 1.00] | -21.0 [-100, 5.00] |
post.user <- ggplot(right_0.post, aes(x=seconds, y = percent_change, group = subject_id))+
geom_line(show.legend = FALSE, alpha = 0.5)+
facet_grid(rows = vars(user_cat))+
labs(title ="POST data percent change by THC use category")+
theme_bw()
Calculate the difference (post-pre) within subjects and plot by THC user category
wi.user <- ggplot(right_0.pt, aes(x=seconds, y = wiPctChg, group = subject_id))+
geom_line(show.legend = FALSE, alpha = 0.5)+
facet_grid(rows = vars(user_cat))+
labs(title ="Within subject difference in percent change by THC use category")+
theme_bw()
# jpeg(file.path(res_folder, "Post_WPD_Trajectories.jpg"),
# height = 5, width = 12, res = 300, units = "in")
grid.arrange(post.user, wi.user, nrow = 1)
# dev.off()
Non-user plot seems “centered” around 0 but still showing heterogeneity. Lots of heterogeneity across user-groups, but a mostly positive difference.
mean_fxn.pt <- as.data.frame(aggregate(right_0.pt.w[, 2:402],
list(right_0.pt.w$user_cat),
FUN = function(x) mean(x, na.rm = T)))
wiPctChg <- names(right_0.pt.w)[2:402]
mean_fxn.pt.l <- reshape(mean_fxn.pt,
varying = wiPctChg,
v.names = "wi_percent_change",
timevar = "frame",
times = wiPctChg,
direction = "long")
mean_fxn.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", mean_fxn.pt.l$frame))
rownames(mean_fxn.pt.l) <- NULL
mean_fxn.pt.l$id <- NULL
names(mean_fxn.pt.l)[names(mean_fxn.pt.l) == "Group.1"] <- "user_cat"
ggplot(mean_fxn.pt.l, aes(x=frame/30, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
labs(title ="Average Within subject difference in percent change by THC use category",
x = "seconds")+
theme_bw()
Difference in trajectories (post-pre), show a distinct difference between non-user and both user groups (up to frame 200), but the differences might not be significant. Harder to distinguish between the occasional and daily user trajectories.
# Check to make sure there are significant numbers in each user category
table(right_0.pt.w$user_cat)
##
## non-user occasional daily
## 29 30 25
sd_fxn.pt <- as.data.frame(aggregate(right_0.pt.w[, 2:402],
list(right_0.pt.w$user_cat),
FUN = function(x) sd(x, na.rm = T)))
wiPctChg <- names(right_0.pt.w)[2:402]
sd_fxn.pt.l <- reshape(sd_fxn.pt,
varying = wiPctChg,
v.names = "wi_percent_change",
timevar = "frame",
times = wiPctChg,
direction = "long")
sd_fxn.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", sd_fxn.pt.l$frame))
rownames(sd_fxn.pt.l) <- NULL
sd_fxn.pt.l$id <- NULL
names(sd_fxn.pt.l)[names(sd_fxn.pt.l) == "Group.1"] <- "user_cat"
sd_fxn.pt.l$wi_percent_change_neg <- -1*sd_fxn.pt.l$wi_percent_change
ggplot(sd_fxn.pt.l, aes(x=frame/30, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
geom_line(aes(x=frame/30, y = wi_percent_change_neg, group = user_cat, color = user_cat),
linetype = "dashed")+
labs(title ="+/- 1 SD Within subject difference in percent change by THC use category",
x = "seconds")+
theme_bw()
## number of missing data points in each column
missingnessCol <- apply(right_0.pt.w[, 2:402],
MARGIN = 2,
FUN = function(x) sum(is.na(x)))
sum(missingnessCol == 0) ## 140 columns without any missing data
## [1] 223
sum(missingnessCol == 84) ## 109 columns with all missing data
## [1] 0
sum(missingnessCol <= 68) ## 438 columns where at least 80% of participants have data
## [1] 401
hist(missingnessCol, breaks = 30)
missing.df <- data.frame(frame = seq(0, 400, by =1),
missingness = round(missingnessCol/nrow(right_0.pt.w)*100, 2),
stringsAsFactors = F)
ggplot(missing.df, aes(x=frame/30, y= missingness))+
geom_line()
Truncate within person difference data to frame 400 where missingness
reaches 50%.
Try to visualize missing data in within person data frame
wi_pct_chg <- names(right_0.pt.w)[2:402]
right_0.pt.l <- reshape(right_0.pt.w,
varying = wi_pct_chg,
v.names = "wi_pct_chg_diff",
timevar = 'frame',
times = wi_pct_chg,
direction = "long")
right_0.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", right_0.pt.l$frame))
rownames(right_0.pt.l) <- NULL
right_0.pt.l$id <- NULL
right_0.pt.l$notMissing <- ifelse(is.na(right_0.pt.l$wi_pct_chg_diff), 0, 1)
ggplot(right_0.pt.l, aes(x = as.factor(frame), y = subject_id, fill = as.factor(notMissing)))+
geom_raster(alpha = 0.8)+
scale_fill_manual(values = c("black", 'white'),
labels = c("Missing", "Present"))+
scale_x_discrete(breaks = seq(0, 400, by = 25))+
geom_vline(xintercept = 350,color = "darkred")+
geom_vline(xintercept = 375,color = "darkblue")+
theme(axis.text.x=element_text(angle = 45, vjust = 1, hjust = 1))
right_0_wi.fpca <- fpca.face(Y = as.matrix(right_0.pt.w[, 2:402]), pve = 0.95, knots = 30, var = T)
# str(right_0.fpca)
# plot_shiny(right_0.fpca)
right_0_wi.fpca.pve <- round(right_0_wi.fpca$evalues/sum(right_0_wi.fpca$evalues)*100, 2)
mu <- right_0_wi.fpca$mu
right_sd <- sqrt(right_0_wi.fpca$evalues)
right_Phi <- right_0_wi.fpca$efunctions
wi.df_plot <- data.frame(seconds = seq(0, 400, by = 1)/30,
mu = mu,
errband1 = 2*right_Phi[, 1]*right_sd[1],
errband2 = 2*right_Phi[, 2]*right_sd[2],
errband3 = 2*right_Phi[, 3]*right_sd[3],
errband4 = 2*right_Phi[, 4]*right_sd[4],
errband5 = 2*right_Phi[, 5]*right_sd[5],
errband6 = 2*right_Phi[, 6]*right_sd[6])
wi.ylim_max = max(wi.df_plot$mu) + max(wi.df_plot [, c("errband1", "errband2","errband3","errband4","errband5", "errband6")], na.rm = T)
wi.ylim_min = min(wi.df_plot$mu) - max(wi.df_plot [, c("errband1", "errband2","errband3","errband4","errband5", "errband6")], na.rm = T)
colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
wi.plot_pc1 <- ggplot(data = wi.df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC1:", "% Var Explained:", right_0_wi.fpca.pve[1]))+
scale_color_manual(values = colors)+
scale_y_continuous(limits = c(wi.ylim_min, wi.ylim_max))+
theme_bw()
wi.plot_pc2 <- ggplot(data = wi.df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC2:", "% Var Explained:", right_0_wi.fpca.pve[2]))+
scale_color_manual(values = colors)+
scale_y_continuous(limits = c(wi.ylim_min, wi.ylim_max))+
theme_bw()
wi.plot_pc3 <- ggplot(data = wi.df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC3:", "% Var Explained:", right_0_wi.fpca.pve[3]))+
scale_color_manual(values = colors)+
scale_y_continuous(limits = c(wi.ylim_min, wi.ylim_max))+
theme_bw()
wi.plot_pc4 <- ggplot(data = wi.df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband4, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband4, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC4:", "% Var Explained:", right_0_wi.fpca.pve[4]))+
scale_color_manual(values = colors)+
scale_y_continuous(limits = c(wi.ylim_min, wi.ylim_max))+
theme_bw()
wi.plot_pc5 <- ggplot(data = wi.df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband5, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband5, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC5:", "% Var Explained:", right_0_wi.fpca.pve[5]))+
scale_color_manual(values = colors)+
scale_y_continuous(limits = c(wi.ylim_min, wi.ylim_max))+
theme_bw()
wi.plot_pc6 <- ggplot(data = wi.df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband6, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband6, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC6:", "% Var Explained:", right_0_wi.fpca.pve[6]))+
scale_color_manual(values = colors)+
scale_y_continuous(limits = c(wi.ylim_min, wi.ylim_max))+
theme_bw()
legend_wiPC <- g_legend(wi.plot_pc1)
grid.arrange(wi.plot_pc1+theme(legend.position = "none"),
wi.plot_pc2+theme(legend.position = "none"),
wi.plot_pc3+theme(legend.position = "none"),
wi.plot_pc4+theme(legend.position = "none"),
wi.plot_pc5+theme(legend.position = "none"),
wi.plot_pc6+theme(legend.position = "none"),
legend_wiPC,
layout_matrix = rbind(c(1,2,3, 7), c(4, 5, 6, 7)),
widths = c(10, 10, 10, 3),
nrow = 2, ncol = 4)
# jpeg(file.path(res_folder, "WPD_PC4.jpg"),
# height = 6, width = 8, res = 300, units = "in")
grid.arrange(wi.plot_pc1+theme(legend.position = "none"),
wi.plot_pc2+theme(legend.position = "none"),
wi.plot_pc3+theme(legend.position = "none"),
wi.plot_pc4+theme(legend.position = "none"),
legend_wiPC,
layout_matrix = rbind(c(1,2,5), c(3,4,5)),
widths = c(10, 10, 3),
nrow = 2, ncol = 3)
# dev.off()
PC1: Individuals with high scores on PC1 have a weaker minimal constriction at post compared to pre. Individuals with low scores on PC1 have a stronger minimal constriction at post compared to pre.
PC2: Individuals with high loading on PC2 have a weaker initial constriction at post and a stronger rebound dilation past the 200th frame. Individuals with a low loading on PC2 have a stronger initial constriction at paost and a weaker rebound dilation past the 200th frame.
wi.scores <- as.data.frame(right_0_wi.fpca$scores)
names(wi.scores) <- c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6")
wi.scores$subject_id <- rownames(wi.scores)
pt.wi.scores <- merge(wi.scores, pt.df,
by = "subject_id",
all.x = T)
pt.wi.scores$smoker <- ifelse(pt.wi.scores$user_cat %in% c("daily", "occasional"),1, 0)
# pc_ind_plots <- function(scores.df, raw.df, pc_plot.df, q, pc= "PC1", id = "subject_id", pc.val = 1, pc_plot.var = "wiPctChg", raw.plot.var = "percent_change"){
# q.pc <- quantile(scores.df[, pc], probs = q)
# q.ids <- scores.df[scores.df[, pc] > q.pc, id]
#
# q.df <- pc_plot.df[pc_plot.df[, id] %in% q.ids, ]
#
# colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
#
# print(ggplot(data = pc_plot.df, aes(x=frame, y = mu))+
# geom_line(aes(color = "Pop.Mean"))+
# geom_line(aes(x = frame, y = mu+ pc_plot.df[, 2+pc.val], color = "+2SD"))+
# geom_line(aes(x = frame, y = mu- pc_plot.df[, 2+pc.val], color = "-2SD"))+
# labs(x = "frame",
# y = "Percent Change",
# color = "Legend",
# title = paste0("Individuals in the ", q*100,"th Percentile of PC1 scores"))+
# geom_line(data = q.df, aes(x=frame, y = pc_plot.var, group = id))+
# #scale_color_manual(values = colors)+
# theme_bw())
#
# for(i in q.ids){
# print(ggplot(data = raw.df[raw.df[, id] == i, ],
# aes(x = frame, y = raw.plot.var, group = tp, color = tp))+
# geom_line()+
# labs(title = paste0(i, " Pre/Post: ", q*100 , "th Percentile PC1 scores"))+
# theme_bw())
# }
# }
# pc_ind_plots(scores.df = wi.scores,
# raw.df = right_0.df,
# pc_plot.df = right_0.pt,
# q = 0.95, pc = "PC1", id = "subject_id", pc.val = 1, pc_plot.var = "wiPctChg", raw.plot.var = "percent_change")
q.95 <- quantile(wi.scores$PC1, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC1 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC1 scores")+
geom_line(data = pctile95.wi.df, aes(x=frame/30, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
for(i in pctile95.wi){
print(ggplot(data = right_0.post[right_0.post$subject_id == i, ],
aes(x = seconds, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC1 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC1, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC1 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC1 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame/30, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
for(i in pctile05.wi){
print(ggplot(data = right_0.post[right_0.post$subject_id == i, ],
aes(x = seconds, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC1 scores"))+
theme_bw())
}
q.95 <- quantile(wi.scores$PC2, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC2 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC2 scores")+
geom_line(data = pctile95.wi.df, aes(x=frame/30, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
for(i in pctile95.wi){
print(ggplot(data = right_0.post[right_0.post$subject_id == i, ],
aes(x = seconds, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC2 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC2, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC2 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC2 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame/30, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
for(i in pctile05.wi){
print(ggplot(data = right_0.post[right_0.post$subject_id == i, ],
aes(x = seconds, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC2 scores"))+
theme_bw())
}
q.95 <- quantile(wi.scores$PC3, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC3 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC3 scores")+
geom_line(data = pctile95.wi.df, aes(x=seconds, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
for(i in pctile95.wi){
print(ggplot(data = right_0.post[right_0.post$subject_id == i, ],
aes(x = seconds, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC3 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC3, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC3 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = seconds, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = seconds, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = seconds, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC3 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame/30, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
for(i in pctile05.wi){
print(ggplot(data = right_0.post[right_0.post$subject_id == i, ],
aes(x = seconds, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC3 scores"))+
theme_bw())
}
### INVESTIGATE BUMPS IN MEAN Function
## Pre/Post Across subjects
ggplot(mean_fxn.l, aes(x=frame/30, y = percent_change, group = grp,
color = user_cat))+
geom_line()+
xlim(50, 200)+
facet_grid(rows = vars(tp))+
labs(title = "Average Percent Change by Pre/Post and THC use category",
x = "seconds")+
theme_bw()
right_0.w[right_0.w$user_cat == "non-user" & right_0.w$tp == "pre2", 103:105]
right_0.w[right_0.w$user_cat == "non-user" & right_0.w$tp == "post", 101:103]
## Within person plots
ggplot(mean_fxn.pt.l, aes(x=frame/30, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
xlim(50, 200)+
labs(title ="Average Within subject difference in percent change by THC use category",
x = "seconds")+
theme_bw()
## Mean function Spike in Occassional user group
mean_fxn.pt[mean_fxn.pt$Group.1 == "occasional", 113:119]
right_0.pt.w[right_0.pt.w$user_cat == "occasional", 116:119]
## Mean function Spike in Daily user group
mean_fxn.pt[mean_fxn.pt$Group.1 == "daily", 133:149]
right_0.pt.w[right_0.pt.w$user_cat == "daily", 143:146]
Jagged changes in the mean function lines seems to stem from missing data at those frames in the data set. The subjects per user-group is between 25 - 30, so missing data for one individual has a noticeable effect on mean function line.
## png
## 2
auc.df$AUC <- round(as.numeric(auc.df$AUC), 3)
knitr::kable(auc.df)
| Label | AUC |
|---|---|
| Post AUC PCs | 0.681 |
| Post AUC Scalar Feat | 0.675 |
| W/I Diff AUC PCS | 0.710 |
| W/I Diff AUC Scalar Feat | 0.681 |
AUC Inference
Vr_10 <- function(nN, srpi, srnj){
Vr_10i <- vector(mode = "numeric", length = length(srpi))
for(i in 1:length(srpi)){
Vr_10i[i] = (1/nN)*(sum(srpi[i] > srnj)+ 0.5*sum(srpi[i] == srnj))
}
return(Vr_10i)
}
Vr_01 <- function(pN, srpi, srnj){
Vr_01i <- vector(mode = "numeric", length = length(srnj))
for(i in 1:length(srnj)){
Vr_01i[i] = (1/pN)*(sum(srnj[i] < srpi)+ 0.5*sum(srnj[i] == srpi))
}
return(Vr_01i)
}
w10 <- function(nP, v10_r, auc_r, v10_s, auc_s){
diff_r <- vector(mode = "numeric", length = length(v10_r))
diff_s <- vector(mode = "numeric", length = length(v10_s))
for(i in 1:length(v10_r)){
diff_r[i] <- v10_r[i] - auc_r
diff_s[i] <- v10_s[i] - auc_r
}
res <- (nP-1)^(-1)* sum(diff_r * diff_s)
return(res)
}
w01 <- function(nN, v01_r, auc_r, v01_s, auc_s){
diff_r <- vector(mode = "numeric", length = length(v01_r))
diff_s <- vector(mode = "numeric", length = length(v01_s))
for(i in 1:length(v01_r)){
diff_r[i] <- v01_r[i] - auc_r
diff_s[i] <- v01_s[i] - auc_r
}
res <- (nN-1)^(-1)* sum(diff_r * diff_s)
return(res)
}
# w10_postPC_postSF <- w10(nP = sum(auc_inference$smoker == 1),
# v10_r = V_postPC_10,
# auc_r = auc.df$AUC[auc.df$Label == "Post AUC PCs"],
# v10_s = V_postSF_10,
# auc_s = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"])
#
# w01_postPC_postSF <- w01(nN = sum(auc_inference$smoker == 0),
# v01_r = V_postPC_01,
# auc_r = auc.df$AUC[auc.df$Label == "Post AUC PCs"],
# v01_s = V_postSF_01,
# auc_s = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"])
w_rs <- function(nP, nN, srpi_r, srnj_r, srpi_s, srnj_s, auc_r, auc_s){
v10_r <- Vr_10(nN, srpi_r, srnj_r)
v01_r <- Vr_01(pN=nP, srpi_r, srnj_r)
v10_s <- Vr_10(nN, srpi_s, srnj_s)
v01_s <- Vr_01(pN=nP, srpi_s, srnj_s)
w10_rs <- w10(nP, v10_r, auc_r, v10_s, auc_s)
w01_rs <- w01(nN, v01_r, auc_r, v01_s, auc_s)
res <- (nP)^(-1)*w10_rs + (nN)^(-1)*w01_rs
return(res)
}
w_postPC_postPC <- w_rs(nP = sum(auc_inference$smoker == 1),
nN = sum(auc_inference$smoker == 0),
srpi_r = auc_inference$post_PC_aucScores[auc_inference$smoker == 1],
srnj_r = auc_inference$post_PC_aucScores[auc_inference$smoker == 0],
srpi_s = auc_inference$post_PC_aucScores[auc_inference$smoker == 1],
srnj_s = auc_inference$post_PC_aucScores[auc_inference$smoker == 0],
auc_r = auc.df$AUC[auc.df$Label == "Post AUC PCs"],
auc_s = auc.df$AUC[auc.df$Label == "Post AUC PCs"])
w_postSF_postSF <- w_rs(nP = sum(auc_inference$smoker == 1),
nN = sum(auc_inference$smoker == 0),
srpi_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
srnj_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
srpi_s = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
srnj_s = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
auc_r = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"],
auc_s = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"])
w_wpdPC_wpdPC <- w_rs(nP = sum(auc_inference$smoker == 1),
nN = sum(auc_inference$smoker == 0),
srpi_r = auc_inference$wpd_PC_aucScores[auc_inference$smoker == 1],
srnj_r = auc_inference$wpd_PC_aucScores[auc_inference$smoker == 0],
srpi_s = auc_inference$wpd_PC_aucScores[auc_inference$smoker == 1],
srnj_s = auc_inference$wpd_PC_aucScores[auc_inference$smoker == 0],
auc_r = auc.df$AUC[auc.df$Label == "W/I Diff AUC PCS"],
auc_s = auc.df$AUC[auc.df$Label == "W/I Diff AUC PCS"])
w_wpdSF_wpdSF <- w_rs(nP = sum(auc_inference$smoker == 1),
nN = sum(auc_inference$smoker == 0),
srpi_r = auc_inference$wpd_SF_aucScores[auc_inference$smoker == 1],
srnj_r = auc_inference$wpd_SF_aucScores[auc_inference$smoker == 0],
srpi_s = auc_inference$wpd_SF_aucScores[auc_inference$smoker == 1],
srnj_s = auc_inference$wpd_SF_aucScores[auc_inference$smoker == 0],
auc_r = auc.df$AUC[auc.df$Label == "W/I Diff AUC Scalar Feat"],
auc_s = auc.df$AUC[auc.df$Label == "W/I Diff AUC Scalar Feat"])
w_postPC_postSF <- w_rs(nP = sum(auc_inference$smoker == 1),
nN = sum(auc_inference$smoker == 0),
srpi_r = auc_inference$post_PC_aucScores[auc_inference$smoker == 1],
srnj_r = auc_inference$post_PC_aucScores[auc_inference$smoker == 0],
srpi_s = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
srnj_s = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
auc_r = auc.df$AUC[auc.df$Label == "Post AUC PCs"],
auc_s = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"])
w_postPC_wpdPC <- w_rs(nP = sum(auc_inference$smoker == 1),
nN = sum(auc_inference$smoker == 0),
srpi_r = auc_inference$post_PC_aucScores[auc_inference$smoker == 1],
srnj_r = auc_inference$post_PC_aucScores[auc_inference$smoker == 0],
srpi_s = auc_inference$wpd_PC_aucScores[auc_inference$smoker == 1],
srnj_s = auc_inference$wpd_PC_aucScores[auc_inference$smoker == 0],
auc_r = auc.df$AUC[auc.df$Label == "Post AUC PCs"],
auc_s = auc.df$AUC[auc.df$Label == "W/I Diff AUC PCS"])
w_postPC_wpdSF <- w_rs(nP = sum(auc_inference$smoker == 1),
nN = sum(auc_inference$smoker == 0),
srpi_r = auc_inference$post_PC_aucScores[auc_inference$smoker == 1],
srnj_r = auc_inference$post_PC_aucScores[auc_inference$smoker == 0],
srpi_s = auc_inference$wpd_SF_aucScores[auc_inference$smoker == 1],
srnj_s = auc_inference$wpd_SF_aucScores[auc_inference$smoker == 0],
auc_r = auc.df$AUC[auc.df$Label == "Post AUC PCs"],
auc_s = auc.df$AUC[auc.df$Label == "W/I Diff AUC Scalar Feat"])
w_postSF_wpdPC <- w_rs(nP = sum(auc_inference$smoker == 1),
nN = sum(auc_inference$smoker == 0),
srpi_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
srnj_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
srpi_s = auc_inference$wpd_PC_aucScores[auc_inference$smoker == 1],
srnj_s = auc_inference$wpd_PC_aucScores[auc_inference$smoker == 0],
auc_r = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"],
auc_s = auc.df$AUC[auc.df$Label == "W/I Diff AUC PCS"])
w_postSF_wpdSF <- w_rs(nP = sum(auc_inference$smoker == 1),
nN = sum(auc_inference$smoker == 0),
srpi_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 1],
srnj_r = auc_inference$post_SF_aucScores[auc_inference$smoker == 0],
srpi_s = auc_inference$wpd_SF_aucScores[auc_inference$smoker == 1],
srnj_s = auc_inference$wpd_SF_aucScores[auc_inference$smoker == 0],
auc_r = auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"],
auc_s = auc.df$AUC[auc.df$Label == "W/I Diff AUC Scalar Feat"])
w_wpdPC_wpdSF <- w_rs(nP = sum(auc_inference$smoker == 1),
nN = sum(auc_inference$smoker == 0),
srpi_r = auc_inference$wpd_PC_aucScores[auc_inference$smoker == 1],
srnj_r = auc_inference$wpd_PC_aucScores[auc_inference$smoker == 0],
srpi_s = auc_inference$wpd_SF_aucScores[auc_inference$smoker == 1],
srnj_s = auc_inference$wpd_SF_aucScores[auc_inference$smoker == 0],
auc_r = auc.df$AUC[auc.df$Label == "W/I Diff AUC PCS"],
auc_s = auc.df$AUC[auc.df$Label == "W/I Diff AUC Scalar Feat"])
z_postPC_postSF <- (auc.df$AUC[auc.df$Label == "Post AUC PCs"] - auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"])/
(sqrt(w_postPC_postPC + w_postSF_postSF - w_postPC_postSF))
z_postPC_wpdPC <- (auc.df$AUC[auc.df$Label == "Post AUC PCs"] - auc.df$AUC[auc.df$Label == "W/I Diff AUC PCS"])/
(sqrt(w_postPC_postPC + w_wpdPC_wpdPC - w_postPC_wpdPC))
z_postPC_wpdSF <- (auc.df$AUC[auc.df$Label == "Post AUC PCs"] - auc.df$AUC[auc.df$Label == "W/I Diff AUC Scalar Feat"])/
(sqrt(w_postPC_postPC + w_wpdSF_wpdSF - w_postPC_wpdSF))
z_postSF_wpdPC <- (auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"] - auc.df$AUC[auc.df$Label == "W/I Diff AUC PCS"])/
(sqrt(w_postSF_postSF + w_wpdPC_wpdPC - w_postSF_wpdPC))
z_postSF_wpdSF <- (auc.df$AUC[auc.df$Label == "Post AUC Scalar Feat"] - auc.df$AUC[auc.df$Label == "W/I Diff AUC Scalar Feat"])/
(sqrt(w_postSF_postSF + w_wpdSF_wpdSF - w_postSF_wpdSF))
z_wpdPC_wpdSF <- (auc.df$AUC[auc.df$Label == "W/I Diff AUC PCS"] - auc.df$AUC[auc.df$Label == "W/I Diff AUC Scalar Feat"])/
(sqrt(w_wpdSF_wpdSF + w_wpdPC_wpdPC - w_wpdPC_wpdSF))
auc_compare <- data.frame("Test Desciption" = c("AUC-postPC v AUC-postSF",
"AUC-postPC v AUC-wpdPC",
"AUC-postPC v AUC-wpdSF",
"AUC-postSF v AUC-wpdPC",
"AUC-postSF v AUC-wpdPC",
"AUC-wpdPC v AUC-wpdSF"),
"z" = c(round(z_postPC_postSF, 3),
round(z_postPC_wpdPC, 3),
round(z_postPC_wpdSF, 3),
round(z_postSF_wpdPC, 3),
round(z_postSF_wpdSF, 3),
round(z_wpdPC_wpdSF, 3)))
auc_compare$p <- round((1-pnorm(abs(auc_compare$z)))*2, 3)
knitr::kable(auc_compare)
| Test.Desciption | z | p |
|---|---|---|
| AUC-postPC v AUC-postSF | 0.080 | 0.936 |
| AUC-postPC v AUC-wpdPC | -0.351 | 0.726 |
| AUC-postPC v AUC-wpdSF | 0.000 | 1.000 |
| AUC-postSF v AUC-wpdPC | -0.440 | 0.660 |
| AUC-postSF v AUC-wpdPC | -0.074 | 0.941 |
| AUC-wpdPC v AUC-wpdSF | 0.415 | 0.678 |
pt.scores$male <- ifelse(pt.scores$demo_gender == "Male", 1, 0)
pt.scores$non_user <- ifelse(pt.scores$user_cat == "non-user", 1, 0)
pt.scores$daily <- ifelse(pt.scores$user_cat == "daily", 1, 0)
pt.scores$occasional <- ifelse(pt.scores$user_cat == "occasional", 1, 0)
pt.wi.scores$male <- ifelse(pt.wi.scores$demo_gender == "Male", 1, 0)
pt.wi.scores$non_user <- ifelse(pt.wi.scores$user_cat == "non-user", 1, 0)
pt.wi.scores$daily <- ifelse(pt.wi.scores$user_cat == "daily", 1, 0)
pt.wi.scores$occasional <- ifelse(pt.wi.scores$user_cat == "occasional", 1, 0)
Compare PCs to Scalar Features
chart.Correlation(pt.scores[, c("PC1", "PC2", "PC3", "PC4",
"min_constriction.post", "duration.post",
"avg_end_percent.post", "slope.post", "AUC.post")])
Compare PCs to demog Variables
chart.Correlation(pt.scores[, c("PC1", "PC2", "PC3", "PC4",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
Compare Scalar Features to demog variables
chart.Correlation(pt.scores[, c("min_constriction.post", "duration.post",
"avg_end_percent.post", "slope.post", "AUC.post",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
post.age.m <- lm(demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.age.m)
##
## Call:
## lm(formula = demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6845 -4.3705 -0.2025 3.0915 12.7315
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.011107 0.540577 59.217 <2e-16 ***
## PC1 -0.004604 0.003968 -1.161 0.2493
## PC2 0.021247 0.011155 1.905 0.0605 .
## PC3 -0.016639 0.017201 -0.967 0.3363
## PC4 -0.014717 0.025677 -0.573 0.5682
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.954 on 79 degrees of freedom
## Multiple R-squared: 0.07418, Adjusted R-squared: 0.0273
## F-statistic: 1.582 on 4 and 79 DF, p-value: 0.1872
post.wt.m <- lm(demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.wt.m)
##
## Call:
## lm(formula = demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -73.326 -29.368 -5.404 21.755 104.387
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 171.66399 4.21253 40.751 <2e-16 ***
## PC1 -0.01098 0.03092 -0.355 0.723
## PC2 0.11151 0.08692 1.283 0.203
## PC3 -0.11148 0.13404 -0.832 0.408
## PC4 -0.26579 0.20009 -1.328 0.188
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.61 on 79 degrees of freedom
## Multiple R-squared: 0.05157, Adjusted R-squared: 0.003548
## F-statistic: 1.074 on 4 and 79 DF, p-value: 0.3751
post.ht.m <- lm(demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.ht.m)
##
## Call:
## lm(formula = demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6843 -2.8328 -0.1437 2.5709 9.4686
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.695465 0.459844 149.389 <2e-16 ***
## PC1 -0.002579 0.003375 -0.764 0.447
## PC2 -0.007534 0.009489 -0.794 0.430
## PC3 -0.002316 0.014632 -0.158 0.875
## PC4 -0.015311 0.021842 -0.701 0.485
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.214 on 79 degrees of freedom
## Multiple R-squared: 0.02152, Adjusted R-squared: -0.02802
## F-statistic: 0.4344 on 4 and 79 DF, p-value: 0.7834
post.bmi.m <- lm(BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.bmi.m)
##
## Call:
## lm(formula = BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.507 -3.132 -0.392 2.565 8.977
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.4085955 0.4696584 54.100 <2e-16 ***
## PC1 -0.0002697 0.0034470 -0.078 0.9378
## PC2 0.0214059 0.0096912 2.209 0.0301 *
## PC3 -0.0104595 0.0149447 -0.700 0.4861
## PC4 -0.0366002 0.0223084 -1.641 0.1048
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.304 on 79 degrees of freedom
## Multiple R-squared: 0.09332, Adjusted R-squared: 0.04741
## F-statistic: 2.033 on 4 and 79 DF, p-value: 0.09779
post.thc.m <- lm(thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.thc.m)
##
## Call:
## lm(formula = thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.731 -11.175 -8.500 -0.014 111.024
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.89476 2.45021 4.855 6.08e-06 ***
## PC1 0.01352 0.01788 0.756 0.452
## PC2 -0.01683 0.05026 -0.335 0.739
## PC3 0.06726 0.07774 0.865 0.390
## PC4 -0.05451 0.11573 -0.471 0.639
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.32 on 78 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.02048, Adjusted R-squared: -0.02975
## F-statistic: 0.4077 on 4 and 78 DF, p-value: 0.8026
post.consump.m <- lm(postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.consump.m)
##
## Call:
## lm(formula = postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.3643 -3.2540 -0.4397 2.7332 19.2217
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.615735 0.792896 78.971 <2e-16 ***
## PC1 -0.007264 0.006659 -1.091 0.281
## PC2 0.029166 0.018235 1.599 0.116
## PC3 -0.022868 0.031685 -0.722 0.474
## PC4 0.025181 0.039465 0.638 0.526
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.544 on 50 degrees of freedom
## (29 observations deleted due to missingness)
## Multiple R-squared: 0.08165, Adjusted R-squared: 0.008178
## F-statistic: 1.111 on 4 and 50 DF, p-value: 0.3616
post.male.m <- glm(male ~ PC1 + PC2 + PC3 + PC4, family = "binomial", data = pt.scores)
summary(post.male.m)
##
## Call:
## glm(formula = male ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1788 -1.1988 0.7545 1.0495 1.3019
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.378168 0.231748 1.632 0.103
## PC1 -0.002916 0.001838 -1.586 0.113
## PC2 -0.005282 0.005524 -0.956 0.339
## PC3 -0.003360 0.007911 -0.425 0.671
## PC4 -0.015126 0.011813 -1.280 0.200
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 114.10 on 83 degrees of freedom
## Residual deviance: 108.65 on 79 degrees of freedom
## AIC: 118.65
##
## Number of Fisher Scoring iterations: 4
Compare PCs to Scalar Features
chart.Correlation(pt.wi.scores[, c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6",
"min_constriction_chg", "AUC_chg", "duration_chg",
"avg_end_percent_chg", "slope_chg")])
Compare PCs to demog Variables
chart.Correlation(pt.wi.scores[, c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
Compare Scalar Features to demog variables
chart.Correlation(pt.wi.scores[, c("min_constriction_chg", "AUC_chg", "duration_chg",
"avg_end_percent_chg", "slope_chg",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
wi.age.m <- lm(demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.age.m)
##
## Call:
## lm(formula = demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9988 -3.4891 -0.7743 2.7927 13.5604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.002624 0.548036 58.395 <2e-16 ***
## PC1 0.001966 0.003700 0.531 0.597
## PC2 -0.009954 0.007303 -1.363 0.177
## PC3 -0.007683 0.010012 -0.767 0.445
## PC4 0.015526 0.013419 1.157 0.251
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.022 on 79 degrees of freedom
## Multiple R-squared: 0.0488, Adjusted R-squared: 0.0006381
## F-statistic: 1.013 on 4 and 79 DF, p-value: 0.4057
wi.wt.m <- lm(demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.wt.m)
##
## Call:
## lm(formula = demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -89.567 -25.100 -3.635 19.597 98.808
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 171.56514 4.10429 41.801 <2e-16 ***
## PC1 0.05566 0.02771 2.009 0.048 *
## PC2 -0.06805 0.05469 -1.244 0.217
## PC3 -0.08688 0.07498 -1.159 0.250
## PC4 0.13802 0.10049 1.373 0.174
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.61 on 79 degrees of freedom
## Multiple R-squared: 0.1, Adjusted R-squared: 0.05444
## F-statistic: 2.195 on 4 and 79 DF, p-value: 0.07709
wi.ht.m <- lm(demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.ht.m)
##
## Call:
## lm(formula = demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0426 -3.2507 0.1108 2.2040 8.9792
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.677130 0.454452 151.121 <2e-16 ***
## PC1 0.002125 0.003068 0.693 0.491
## PC2 -0.006224 0.006056 -1.028 0.307
## PC3 -0.004791 0.008302 -0.577 0.566
## PC4 0.015086 0.011127 1.356 0.179
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.164 on 79 degrees of freedom
## Multiple R-squared: 0.04468, Adjusted R-squared: -0.003688
## F-statistic: 0.9238 on 4 and 79 DF, p-value: 0.4545
wi.bmi.m <- lm(BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.bmi.m)
##
## Call:
## lm(formula = BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.7935 -2.7105 -0.9652 2.5352 8.8133
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.406031 0.473423 53.665 <2e-16 ***
## PC1 0.006406 0.003196 2.004 0.0484 *
## PC2 -0.005772 0.006309 -0.915 0.3630
## PC3 -0.009432 0.008649 -1.091 0.2788
## PC4 0.010097 0.011592 0.871 0.3864
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.338 on 79 degrees of freedom
## Multiple R-squared: 0.07906, Adjusted R-squared: 0.03243
## F-statistic: 1.696 on 4 and 79 DF, p-value: 0.1593
wi.thc.m <- lm(thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.thc.m)
##
## Call:
## lm(formula = thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.899 -11.063 -6.854 -0.935 109.588
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.824855 2.423977 4.878 5.54e-06 ***
## PC1 0.023979 0.016379 1.464 0.147
## PC2 0.013853 0.032118 0.431 0.667
## PC3 0.045109 0.044151 1.022 0.310
## PC4 -0.006963 0.059020 -0.118 0.906
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.08 on 78 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.04183, Adjusted R-squared: -0.007308
## F-statistic: 0.8513 on 4 and 78 DF, p-value: 0.4971
wi.consump.m <- lm(postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.consump.m)
##
## Call:
## lm(formula = postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6270 -3.2816 -0.2987 2.3024 20.4466
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.778834 0.783715 78.828 <2e-16 ***
## PC1 0.003755 0.005617 0.668 0.507
## PC2 0.011788 0.011540 1.022 0.312
## PC3 0.024460 0.014610 1.674 0.100
## PC4 -0.005263 0.017605 -0.299 0.766
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.554 on 50 degrees of freedom
## (29 observations deleted due to missingness)
## Multiple R-squared: 0.07836, Adjusted R-squared: 0.004631
## F-statistic: 1.063 on 4 and 50 DF, p-value: 0.3847
wi.male.m <- glm(male ~ PC1 + PC2 + PC3 + PC4, family = "binomial", data = pt.wi.scores)
summary(wi.male.m)
##
## Call:
## glm(formula = male ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.wi.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6262 -1.1842 0.6667 1.0082 1.6770
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.3857856 0.2349240 1.642 0.1006
## PC1 0.0025223 0.0016333 1.544 0.1225
## PC2 -0.0068811 0.0035516 -1.937 0.0527 .
## PC3 -0.0063244 0.0047521 -1.331 0.1832
## PC4 -0.0002728 0.0059838 -0.046 0.9636
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 114.10 on 83 degrees of freedom
## Residual deviance: 106.34 on 79 degrees of freedom
## AIC: 116.34
##
## Number of Fisher Scoring iterations: 4
Demographic Variables:
Results:
Regression models regressed demographic variables on the 4 PCs for the post data.
Regression models regressed demographic variables on the 6 PCs for the WPD data.
\[ \begin{aligned} Y_{i}(t) &= f_0(t) + f_1(t)*I(user = occasional) + f_2(t)*I(user = daily) + b_i(t) + \epsilon_i(t)\\ & b_i(t) \sim GP(0, \Sigma_b) \\ & \epsilon_i(t) \sim N(0, \sigma_{\epsilon}^2) \\ &= \sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{1k}\phi_{1k}(t_i)*I(user = occasional) + \sum_{k=1}^K \xi_{2k}\phi_{2k}(t_i)*I(user = daily) + \sum_{k=1}^K \xi_{ik}\phi_k(t_i) + \epsilon_i(t)\\ \end{aligned} \]
\[ \begin{aligned} Y_{i}(t) &= f_0(t)\\ &=\sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) \\ &= \Phi_0\xi_0 \end{aligned} \]
\[ \begin{aligned} Y_{i}(t) &= f_0(t) + f_1(t) \\ &=\sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{1k}\phi_{1k}(t_i)\\ &= \Phi_0\xi_0 + \Phi_1\xi_1\\ &= [ \Phi_0, \Phi_1 ] \xi \end{aligned} \]
\[ \begin{aligned} Y_{i}(t) &= f_0(t) + f_2(t) \\ &=\sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{2k}\phi_{2k}(t_i) \\ &= \Phi_0\xi_0 + \Phi_2\xi_2\\ &= [ \Phi_0, \Phi_2 ] \xi \end{aligned} \]
pt.analytic.df <- pt.df[pt.df$subject_id %in% analytic.N, ]
pt.analytic.df$occasional <- ifelse(pt.analytic.df$user_cat == "occasional", 1, 0)
pt.analytic.df$daily <- ifelse(pt.analytic.df$user_cat == "daily", 1, 0)
pt.analytic.df <- pt.analytic.df[order(pt.analytic.df$subject_id), ]
right_0.post.w <- right_0.post.w[order(right_0.post.w$subject_id), ]
post_pffr.df <- data.frame("subject_id" = as.factor(pt.analytic.df$subject_id),
"occ_user" = pt.analytic.df$occasional,
"daily_user" = pt.analytic.df$daily,
"pct_chg" = I(as.matrix(right_0.post.w[, c(4:404)])))
m.post_pffr <- pffr(pct_chg ~ occ_user + daily_user +s(subject_id, bs="re"),
data = post_pffr.df,
algorithm = "bam",
method = "fREML",
discrete = T)
summary(m.post_pffr)
par(mfrow = c(1, 3))
plot(m.post_pffr)
What does NA in the output mean? Did not find NA in either the matrix or other variables
# head(right_0.df)
right_0.gam.df <- merge(right_0.post, pt.analytic.df,
by = c("subject_id", "user_cat"))
# right_0.gam.df <- right_0.gam.df[order(right_0.gam.df$subject_id, right_0.gam.df$frame), ]
right_0.gam.df$sind <- right_0.gam.df$frame/400
# head(right_0.gam.df)
m.post_gam <- mgcv::gam(percent_change ~ s(sind, k=30, bs="cr") +
s(sind, by=occasional, k=30, bs = "cr") +
s(sind, by=daily, k=30, bs = "cr"),
data = right_0.gam.df, method = "REML")
## Create a matrix of residuals
post_gam.resid <- cbind(right_0.gam.df[!(is.na(right_0.gam.df$percent_change)),
c("subject_id", "frame")],
m.post_gam$residuals)
names(post_gam.resid) <- c("subject_id", "frame", "resid")
# post_gam.resid$frame_char <- str_pad(post_gam.resid$frame, width = 3, side = "left", pad = "0")
resid_mat <- reshape(post_gam.resid[, c("subject_id", "frame", "resid")],
timevar = "frame",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
# resid_names <- names(resid_mat)
# resid_names <- sort(resid_names)
# resid_mat <- resid_mat[, resid_names]
resid_mat <- as.matrix(resid_mat)
post_gam.fpca <- fpca.face(resid_mat, knots = 15)
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
right_0.gam.df <- merge(right_0.gam.df, Phi_mat,
by = "frame",
all.x = T)
right_0.gam.df <- right_0.gam.df[order(right_0.gam.df$subject_id, right_0.gam.df$frame), ]
right_0.gam.df$subject_id <- as.factor(right_0.gam.df$subject_id)
post_gam.fri <- mgcv::bam(percent_change ~
s(sind, k=30, bs="cr") +
s(sind, by=occasional, k=30, bs = "cr") +
s(sind, by=daily, k=30, bs = "cr") +
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"),
method = "fREML", data = right_0.gam.df, discrete = TRUE)
summary(post_gam.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = occasional,
## k = 30, bs = "cr") + s(sind, by = daily, k = 30, bs = "cr") +
## s(subject_id, by = Phi1, bs = "re") + s(subject_id, by = Phi2,
## bs = "re") + s(subject_id, by = Phi3, bs = "re") + s(subject_id,
## by = Phi4, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.3125 0.9197 -11.21 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 27.54 28.43 115.67 <2e-16 ***
## s(sind):occasional 18.63 21.95 70.14 <2e-16 ***
## s(sind):daily 18.62 21.95 35.42 <2e-16 ***
## s(subject_id):Phi1 82.23 84.00 23472.92 <2e-16 ***
## s(subject_id):Phi2 81.68 84.00 2775.02 <2e-16 ***
## s(subject_id):Phi3 82.23 84.00 887.87 <2e-16 ***
## s(subject_id):Phi4 81.05 84.00 1139.47 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.966 Deviance explained = 96.6%
## fREML = 63334 Scale est. = 2.6314 n = 32642
post_gam.coef <- post_gam.fri$coefficients
post_gam.cov <- vcov.gam(post_gam.fri)
df_pred_non <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 0, "daily" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = right_0.gam.df$subject_id[1])
lpmat_non <- predict(post_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
df_pred_occ <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 1, "daily" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = right_0.gam.df$subject_id[1])
lpmat_occ <- predict(post_gam.fri, newdata=df_pred_occ, se.fit=TRUE, type = "lpmatrix")
df_pred_dly <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 0, "daily" = 1,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = right_0.gam.df$subject_id[1])
lpmat_dly <- predict(post_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "lpmatrix")
pred_df <- data.frame(seconds = rep(seq(0, 400), 3)/30,
user = c(rep("non-user", 401), rep("occasional", 401),rep("daily", 401)),
mean = c(lpmat_non %*% post_gam.coef,
lpmat_occ %*% post_gam.coef, lpmat_dly %*% post_gam.coef),
se = c(sqrt(diag(lpmat_non %*% post_gam.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_occ %*% post_gam.cov %*% t(lpmat_occ))),
sqrt(diag(lpmat_dly %*% post_gam.cov %*% t(lpmat_dly)))),
stringsAsFactors = F)
post_userProfile <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
labs(y = "Percent Change")+
theme_bw()
legend_userProfile <- g_legend(post_userProfile)
post_userProfile.se <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
geom_line(aes(x=seconds, y = mean + 2*se, group = user, col = user),
linetype = "longdash")+
geom_line(aes(x=seconds, y = mean - 2*se, group = user, col = user),
linetype = "longdash")+
labs(y = "")+
theme_bw()
post_userTraj <- grid.arrange(arrangeGrob(post_userProfile+theme(legend.position = "none"),
post_userProfile.se+theme(legend.position = "none"), nrow = 1),
legend_userProfile, nrow = 1,
widths = c(30, 6))
pred_f1 <- predict(post_gam.fri, newdata=df_pred_occ, se.fit=TRUE, type = "terms")
pred_f2 <- predict(post_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "terms")
pred_coef_df <- data.frame(seconds = seq(0, 400)/30,
f0_hat = lpmat_non %*% post_gam.coef,
f0_se = sqrt(diag(lpmat_non %*% post_gam.cov %*% t(lpmat_non))),
f1_hat = pred_f1$fit[, 2],
f1_se = pred_f1$se.fit[, 2],
f2_hat = pred_f2$fit[, 3],
f2_se = pred_f2$se.fit[, 3])
post_non_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f0_hat))+
geom_line()+
geom_line(aes(x = seconds, y = f0_hat + 2*f0_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f0_hat - 2*f0_se), linetype = "longdash")+
labs(title = "Average Percent Change of a Non-user (Post)", ylab = "Percent Change")+
theme_bw()
post_occ_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f1_hat))+
geom_line()+
geom_line(aes(x = seconds, y = f1_hat + 2*f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f1_hat - 2*f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = 0), col = "darkred")+
labs(title = "Difference in Percent Change: Occasional and Non-user (Post)",
y= "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
post_dly_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f2_hat))+
geom_line()+
geom_line(aes(x = seconds, y = f2_hat + 2*f2_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f2_hat - 2*f2_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = 0), col = "darkred")+
labs(title = "Difference in Percent Change: Daily and Non-user (Post)",
y = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
ylab <- textGrob(label = "Difference in Percent Change", rot = 90,
gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("Less Constriction in User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("More Constriction in User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
post_occ_coef <- grid.arrange(ylab, posval, negval, post_occ_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
post_dly_coef <- grid.arrange(ylab, posval, negval, post_dly_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
Plot difference between daily and occasional user
f2_f1 <- (lpmat_dly - lpmat_occ) %*% post_gam.coef
f2_f1.se <- sqrt(diag((lpmat_dly - lpmat_occ) %*% post_gam.cov %*% t((lpmat_dly - lpmat_occ))))
f2_f1_diff_df <- data.frame(seconds = seq(0, 400)/30,
f2f1_diff = f2_f1,
f2f1_se = f2_f1.se)
ggplot(data = f2_f1_diff_df, aes(x=seconds, y = f2f1_diff))+
geom_line()+
geom_line(aes(x = seconds, y = f2f1_diff + 2*f2f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f2f1_diff - 2*f2f1_se), linetype = "longdash")+
labs(title = "Difference between Average Daily vs Occasional User", ylab = "f2_hat - f1_hat")+
theme_bw()
post_dlyocc_plt <- ggplot(data = f2_f1_diff_df, aes(x=seconds, y = f2f1_diff))+
geom_line()+
geom_line(aes(x = seconds, y = f2f1_diff + 2*f2f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f2f1_diff - 2*f2f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = 0), col = "darkred")+
labs(title = "Difference in Percent Change: Daily vs Occasional User",
y = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
ylab <- textGrob(label = "Difference in Percent Change", rot = 90,
gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("Less Constriction in Daily User", width = 15,
simplify = F),
paste, collapse = "\n"), rot = 0, just = "bottom",
gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("More Constriction in Daily User", width = 15,
simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
post_dlyocc_coef <- grid.arrange(ylab, posval, negval, post_dlyocc_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)),
c(1, 3, rep(4, 15))))
post_dlyocc_coef
## TableGrob (2 x 17) "arrange": 4 grobs
## z cells name grob
## 1 1 ( 1- 2, 1- 1) arrange text[GRID.text.2941]
## 2 2 ( 1- 1, 2- 2) arrange text[GRID.text.2942]
## 3 3 ( 2- 2, 2- 2) arrange text[GRID.text.2943]
## 4 4 ( 1- 2, 3-17) arrange gtable[layout]
smoker.gam.df <- merge(right_0.post, pt.analytic.df,
by = c("subject_id", "user_cat"))
smoker.gam.df$sind <- smoker.gam.df$frame/400
smoker.gam.df$smoker <- ifelse(smoker.gam.df$user_cat == "non-user", 0, 1)
m.post_smoker_gam <- mgcv::gam(percent_change ~ s(sind, k=30, bs="cr") +
s(sind, by=smoker, k=30, bs = "cr"),
data = smoker.gam.df, method = "REML")
## Create a matrix of residuals
post_smoker_gam.resid <- cbind(smoker.gam.df[!(is.na(smoker.gam.df$percent_change)),
c("subject_id", "frame")],
m.post_smoker_gam$residuals)
names(post_smoker_gam.resid) <- c("subject_id", "frame", "resid")
# post_smoker_gam.resid$frame_char <- str_pad(post_smoker_gam.resid$frame,
# width = 3, side = "left", pad = "0")
resid_mat <- reshape(post_smoker_gam.resid[, c("subject_id", "frame", "resid")],
timevar = "frame",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
# resid_names <- names(resid_mat)
# resid_names <- sort(resid_names)
# resid_mat <- resid_mat[, resid_names]
resid_mat <- as.matrix(resid_mat)
post_smoker_gam.fpca <- fpca.face(resid_mat, knots = 15)
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_smoker_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_smoker_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
smoker.gam.df <- merge(smoker.gam.df, Phi_mat,
by = "frame",
all.x = T)
smoker.gam.df <- smoker.gam.df[order(smoker.gam.df$subject_id, smoker.gam.df$frame), ]
smoker.gam.df$subject_id <- as.factor(smoker.gam.df$subject_id)
post_smoker_gam.fri <- mgcv::bam(percent_change ~
s(sind, k=30, bs="cr") +
s(sind, by=smoker, k=30, bs = "cr") +
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"),
method = "fREML", data = smoker.gam.df, discrete = TRUE)
summary(post_smoker_gam.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = smoker,
## k = 30, bs = "cr") + s(subject_id, by = Phi1, bs = "re") +
## s(subject_id, by = Phi2, bs = "re") + s(subject_id, by = Phi3,
## bs = "re") + s(subject_id, by = Phi4, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.3286 0.9645 -10.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 27.31 28.29 114.24 <2e-16 ***
## s(sind):smoker 19.49 22.78 66.85 <2e-16 ***
## s(subject_id):Phi1 82.65 84.00 24462.37 <2e-16 ***
## s(subject_id):Phi2 82.26 84.00 2954.85 <2e-16 ***
## s(subject_id):Phi3 82.56 84.00 943.48 <2e-16 ***
## s(subject_id):Phi4 81.70 84.00 1213.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.965 Deviance explained = 96.6%
## fREML = 63449 Scale est. = 2.6539 n = 32642
Plot difference between smoker and non-smoker
post_smoker_gam.coef <- post_smoker_gam.fri$coefficients
post_smoker_gam.cov <- vcov.gam(post_smoker_gam.fri)
df_pred_non <- data.frame("sind" = unique(smoker.gam.df$sind), "smoker" =0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = smoker.gam.df$subject_id[1])
lpmat_non <- predict(post_smoker_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
df_pred_smk <- data.frame("sind" = unique(smoker.gam.df$sind), "smoker" = 1,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = smoker.gam.df$subject_id[1])
lpmat_smk <- predict(post_smoker_gam.fri, newdata=df_pred_smk, se.fit=TRUE, type = "lpmatrix")
pred_df <- data.frame(seconds = rep(seq(0, 400), 2)/30,
user = c(rep("non-user", 401), rep("smoker", 401)),
mean = c(lpmat_non %*% post_smoker_gam.coef,
lpmat_smk %*% post_smoker_gam.coef),
se = c(sqrt(diag(lpmat_non %*% post_smoker_gam.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_smk %*% post_smoker_gam.cov %*% t(lpmat_smk)))),
stringsAsFactors = F)
post_smkProfile <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
scale_y_continuous(limits = c(min(pred_df$mean - 2*pred_df$se),
max(pred_df$mean+2*pred_df$se)))+
labs(y = "Percent Change")+
theme_bw()
legend_smkProfile <- g_legend(post_smkProfile)
post_smkProfile.se <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
geom_line(aes(x=seconds, y = mean + 2*se, group = user, col = user),
linetype = "longdash")+
geom_line(aes(x=seconds, y = mean - 2*se, group = user, col = user),
linetype = "longdash")+
scale_y_continuous(limits = c(min(pred_df$mean - 2*pred_df$se),
max(pred_df$mean+2*pred_df$se)))+
labs(y = "")+
theme_bw()
post_smk <- grid.arrange(arrangeGrob(post_smkProfile+theme(legend.position = "none"),
post_smkProfile.se+theme(legend.position = "none"), nrow = 1),
legend_smkProfile, nrow = 1,
widths = c(30, 6))
# pred_f0 <- predict(post_smoker_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "iterms")
pred_f1 <- predict(post_smoker_gam.fri, newdata=df_pred_smk, se.fit=TRUE, type = "terms")
pred_coef_df <- data.frame(seconds = seq(0, 400)/30,
f0_hat = lpmat_non %*% post_smoker_gam.coef,
f0_se = sqrt(diag(lpmat_non %*% post_smoker_gam.cov %*% t(lpmat_non))),
f1_hat = pred_f1$fit[, 2],
f1_se = pred_f1$se.fit[, 2])
post_smk_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f1_hat))+
geom_line()+
geom_line(aes(x = seconds, y = f1_hat + 2*f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f1_hat - 2*f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = 0), col = "darkred")+
labs(title = "Difference in Percent Change: Smoker and Non-smoker",
y = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
ylab <- textGrob(label = "Difference in Percent Change", rot = 90,
gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("Less Constriction in Smokers",
width = 15, simplify = F),
paste, collapse = "\n"), rot = 0, just = "bottom",
gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("More Constriction in Smokers",
width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
post_smk_coef <- grid.arrange(ylab, posval, negval, post_smk_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
right_0.pt.w <- right_0.pt.w[order(right_0.pt.w$subject_id), ]
wi_pffr.df <- data.frame("subject_id" = pt.analytic.df$subject_id,
"occ_user" = pt.analytic.df$occasional,
"daily_user" = pt.analytic.df$daily,
"wi_pct_chg" = I(as.matrix(right_0.pt.w[, c(3:403)])))
wi_pffr.df$subject_id <- as.factor(wi_pffr.df$subject_id)
m.wi_pffr <- pffr(wi_pct_chg ~ occ_user + daily_user + s(subject_id, bs="re"),
data = wi_pffr.df,
algorithm = "bam",
method = "fREML",
discrete = T)
summary(m.wi_pffr)
par(mfrow = c(1,3))
plot(m.wi_pffr)
right_0.pt.gam.df <- merge(right_0.pt, pt.analytic.df,
by = c("subject_id", "user_cat"))
right_0.pt.gam.df$sind <- right_0.pt.gam.df$frame/400
m.wi_gam <- mgcv::gam(wiPctChg ~ s(sind, k=30, bs="cr") +
s(sind, by=occasional, k=30, bs = "cr") +
s(sind, by=daily, k=30, bs = "cr"),
data = right_0.pt.gam.df, method = "REML")
## Create a matrix of residuals
wi_gam.resid <- cbind(right_0.pt.gam.df[!(is.na(right_0.pt.gam.df$wiPctChg)),
c("subject_id", "frame")],
m.wi_gam$residuals)
names(wi_gam.resid) <- c("subject_id", "frame", "resid")
# wi_gam.resid$frame_char <- str_pad(wi_gam.resid$frame, width = 3, side = "left", pad = "0")
resid_mat <- reshape(wi_gam.resid[, c("subject_id", "frame", "resid")],
timevar = "frame",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
# resid_names <- names(resid_mat)
# resid_names <- sort(resid_names)
# resid_mat <- resid_mat[, resid_names]
resid_mat <- as.matrix(resid_mat)
wi_gam.fpca <- fpca.face(resid_mat, knots = 15)
wi_gam.fpca$evalues/sum(wi_gam.fpca$evalues)
## [1] 0.632623116 0.151378789 0.080436006 0.047798611 0.026647436 0.018360175
## [7] 0.015611564 0.010357839 0.007289294 0.005075438 0.004421731
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(wi_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, wi_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
right_0.pt.gam.df <- merge(right_0.pt.gam.df, Phi_mat,
by = "frame",
all.x = T)
# right_0.pt.gam.df <- right_0.pt.gam.df[order(right_0.pt.gam.df$subject_id,
# right_0.pt.gam.df$frame), ]
right_0.pt.gam.df$subject_id <- as.factor(right_0.pt.gam.df$subject_id)
wi_gam.fri <- mgcv::bam(wiPctChg ~ s(sind, k=30, bs="cr") +
s(sind, by=occasional, k=30, bs = "cr") +
s(sind, by=daily, k=30, bs = "cr")+
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re")+
s(subject_id, by = Phi5, bs="re") + s(subject_id, by = Phi6, bs="re"),
method = "fREML", data = right_0.pt.gam.df, discrete = TRUE)
summary(wi_gam.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## wiPctChg ~ s(sind, k = 30, bs = "cr") + s(sind, by = occasional,
## k = 30, bs = "cr") + s(sind, by = daily, k = 30, bs = "cr") +
## s(subject_id, by = Phi1, bs = "re") + s(subject_id, by = Phi2,
## bs = "re") + s(subject_id, by = Phi3, bs = "re") + s(subject_id,
## by = Phi4, bs = "re") + s(subject_id, by = Phi5, bs = "re") +
## s(subject_id, by = Phi6, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.063 1.037 3.917 8.99e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 18.74 21.65 27.60 <2e-16 ***
## s(sind):occasional 18.80 21.91 23.24 <2e-16 ***
## s(sind):daily 19.53 22.64 31.40 <2e-16 ***
## s(subject_id):Phi1 81.64 84.00 32110.07 <2e-16 ***
## s(subject_id):Phi2 82.03 84.00 18773.35 <2e-16 ***
## s(subject_id):Phi3 82.79 84.00 5714.16 <2e-16 ***
## s(subject_id):Phi4 82.47 84.00 2915.36 <2e-16 ***
## s(subject_id):Phi5 82.26 84.00 451.05 <2e-16 ***
## s(subject_id):Phi6 82.70 84.00 852.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.954 Deviance explained = 95.5%
## fREML = 71439 Scale est. = 4.5204 n = 32102
wi_gam.coef <- wi_gam.fri$coefficients
wi_gam.cov <- vcov.gam(wi_gam.fri)
df_pred_non <- data.frame("sind" = unique(right_0.pt.gam.df$sind), "occasional" = 0, "daily" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0, "Phi5" = 0, "Phi6" = 0,
"subject_id" = right_0.pt.gam.df$subject_id[1])
lpmat_non <- predict(wi_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
df_pred_occ <- data.frame("sind" = unique(right_0.pt.gam.df$sind), "occasional" = 1, "daily" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0, "Phi5" = 0, "Phi6" = 0,
"subject_id" = right_0.pt.gam.df$subject_id[1])
lpmat_occ <- predict(wi_gam.fri, newdata=df_pred_occ, se.fit=TRUE, type = "lpmatrix")
df_pred_dly <- data.frame("sind" = unique(right_0.pt.gam.df$sind), "occasional" = 0, "daily" = 1,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0, "Phi5" = 0, "Phi6" = 0,
"subject_id" = right_0.pt.gam.df$subject_id[1])
lpmat_dly <- predict(wi_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "lpmatrix")
pred_df <- data.frame(seconds = rep(seq(0, 400), 3)/30,
user = c(rep("non-user", 401), rep("occasional", 401),rep("daily", 401)),
mean = c(lpmat_non %*% wi_gam.coef,
lpmat_occ %*% wi_gam.coef, lpmat_dly %*% wi_gam.coef),
se = c(sqrt(diag(lpmat_non %*% wi_gam.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_occ %*% wi_gam.cov %*% t(lpmat_occ))),
sqrt(diag(lpmat_dly %*% wi_gam.cov %*% t(lpmat_dly)))),
stringsAsFactors = F)
# ggplot(data = pred_df, aes(x = frame, y = mean, group = user, col = user))+
# geom_line()+
# geom_line(aes(x=frame, y = mean + 2*se, group = user, col = user), linetype = "longdash")+
# geom_line(aes(x=frame, y = mean - 2*se, group = user, col = user), linetype = "longdash")+
# theme_bw()
wi_userProfile <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
scale_y_continuous(limits = c(min(pred_df$mean - 2*pred_df$se),
max(pred_df$mean+2*pred_df$se)))+
labs(y = "Within Person Difference in Percent Change")+
theme_bw()
legend_userProfile <- g_legend(wi_userProfile)
wi_userProfile.se <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
geom_line(aes(x=seconds, y = mean + 2*se, group = user, col = user),
linetype = "longdash")+
geom_line(aes(x=seconds, y = mean - 2*se, group = user, col = user),
linetype = "longdash")+
scale_y_continuous(limits = c(min(pred_df$mean - 2*pred_df$se),
max(pred_df$mean+2*pred_df$se)))+
labs(y = "")+
theme_bw()
wi_userTraj <- grid.arrange(arrangeGrob(wi_userProfile+theme(legend.position = "none"),
wi_userProfile.se+theme(legend.position = "none"), nrow = 1),
legend_userProfile, nrow = 1,
widths = c(30, 6))
pred_f1 <- predict(wi_gam.fri, newdata=df_pred_occ, se.fit=TRUE, type = "terms")
pred_f2 <- predict(wi_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "terms")
pred_coef_df <- data.frame(seconds = seq(0, 400)/30,
f0_hat = lpmat_non %*% wi_gam.coef,
f0_se = sqrt(diag(lpmat_non %*% wi_gam.cov %*% t(lpmat_non))),
f1_hat = pred_f1$fit[, 2],
f1_se = pred_f1$se.fit[, 2],
f2_hat = pred_f2$fit[, 3],
f2_se = pred_f2$se.fit[, 3])
# ggplot(data = pred_coef_df, aes(x=frame, y = f0_hat))+
# geom_line()+
# geom_line(aes(x = frame, y = f0_hat + 2*f0_se), linetype = "longdash")+
# geom_line(aes(x = frame, y = f0_hat - 2*f0_se), linetype = "longdash")+
# labs(title = "Average Response of a Non-user", ylab = "Within Person Difference in Percent Change")+
# theme_bw()
wi_occ_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f1_hat))+
geom_line()+
geom_line(aes(x = seconds, y = f1_hat + 2*f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f1_hat - 2*f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = 0), col = "darkred")+
labs(title = "Difference in Within Person Difference (WPD): Occasional and Non-user",
y = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
wi_dly_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f2_hat))+
geom_line()+
geom_line(aes(x = seconds, y = f2_hat + 2*f2_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f2_hat - 2*f2_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = 0), col = "darkred")+
labs(title = "Difference in Within Person Difference (WPD): Daily and Non-user",
y = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
ylab <- textGrob(label = "Difference in WPD in Percent Change", rot = 90,
gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("More WPD Constriction in User",
width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("Less WPD Constriction in User",
width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
wi_occ_coef <- grid.arrange(ylab, posval, negval, wi_occ_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
wi_dly_coef <- grid.arrange(ylab, posval, negval, wi_dly_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
This plot compares the difference between occasional and non-users for post and WPD data. In the post data there is significant positive difference in the occasional and non-users from frame 50 - 125 which would indicate that occasional user have less constriction during this time. In the WPD data there is a significant positive difference in the occasional and non-users from frame 10 - 175, indicating a greater difference in the difference between post and pre for the occasional vs non-user. Since the typical response has a positive difference between post and pre during this interval caused by less constriction at post compared to pre, we can say that there is even less constriction in post data of an occasional user compared to his/her pre than in a non-user.
These results are similar to the occassional vs non-user but less pronounced maybe indicating more variability in daily and non-users.
Differences in Within Person Difference between Daily and Occassional Users
Difference in Within Person Difference Smoker vs Non-smokers
wi.smoker.gam.df <- merge(right_0.pt, pt.analytic.df,
by = c("subject_id", "user_cat"))
wi.smoker.gam.df$sind <- wi.smoker.gam.df$frame/400
wi.smoker.gam.df$smoker <- ifelse(wi.smoker.gam.df$user_cat == "non-user", 0, 1)
m.wi_smoker_gam <- mgcv::gam(wiPctChg ~ s(sind, k=15, bs="cr") +
s(sind, by=smoker, k=15, bs = "cr"),
data = wi.smoker.gam.df, method = "REML")
## Create a matrix of residuals
wi_smoker_gam.resid <- cbind(wi.smoker.gam.df[!(is.na(wi.smoker.gam.df$wiPctChg)),
c("subject_id", "frame")],
m.wi_smoker_gam$residuals)
names(wi_smoker_gam.resid) <- c("subject_id", "frame", "resid")
# wi_smoker_gam.resid$frame_char <- str_pad(wi_smoker_gam.resid$frame,
# width = 3, side = "left", pad = "0")
resid_mat <- reshape(wi_smoker_gam.resid[, c("subject_id", "frame", "resid")],
timevar = "frame",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
# resid_names <- names(resid_mat)
# resid_names <- sort(resid_names)
# resid_mat <- resid_mat[, resid_names]
resid_mat <- as.matrix(resid_mat)
wi_smoker_gam.fpca <- fpca.face(resid_mat, knots = 15)
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(wi_smoker_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, wi_smoker_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
wi.smoker.gam.df <- merge(wi.smoker.gam.df, Phi_mat,
by = "frame",
all.x = T)
wi.smoker.gam.df <- smoker.gam.df[order(wi.smoker.gam.df$subject_id, wi.smoker.gam.df$frame), ]
wi.smoker.gam.df$subject_id <- as.factor(wi.smoker.gam.df$subject_id)
wi_smoker_gam.fri <- mgcv::bam(percent_change ~
s(sind, k=15, bs="cr") +
s(sind, by=smoker, k=15, bs = "cr") +
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"),
method = "fREML", data = wi.smoker.gam.df, discrete = TRUE)
summary(wi_smoker_gam.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 15, bs = "cr") + s(sind, by = smoker,
## k = 15, bs = "cr") + s(subject_id, by = Phi1, bs = "re") +
## s(subject_id, by = Phi2, bs = "re") + s(subject_id, by = Phi3,
## bs = "re") + s(subject_id, by = Phi4, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8011 1.0111 -0.792 0.428
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 13.42 13.72 208.8 <2e-16 ***
## s(sind):smoker 13.64 14.30 103.1 <2e-16 ***
## s(subject_id):Phi1 83.64 84.00 25787.8 <2e-16 ***
## s(subject_id):Phi2 82.48 84.00 8143.5 <2e-16 ***
## s(subject_id):Phi3 82.98 84.00 1724.7 <2e-16 ***
## s(subject_id):Phi4 81.92 84.00 3190.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.965 Deviance explained = 96.6%
## fREML = 63499 Scale est. = 2.6572 n = 32642
Plot difference between smoker and non-smoker
wi_smoker_gam.coef <- wi_smoker_gam.fri$coefficients
wi_smoker_gam.cov <- vcov.gam(wi_smoker_gam.fri)
df_pred_non <- data.frame("sind" = sort(unique(wi.smoker.gam.df$sind)), "smoker" =0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = wi.smoker.gam.df$subject_id[1])
lpmat_non <- predict(wi_smoker_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
df_pred_smk <- data.frame("sind" = sort(unique(wi.smoker.gam.df$sind)), "smoker" = 1,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = wi.smoker.gam.df$subject_id[1])
lpmat_smk <- predict(wi_smoker_gam.fri, newdata=df_pred_smk, se.fit=TRUE, type = "lpmatrix")
pred_df <- data.frame(seconds = rep(seq(0, 400), 2)/30,
user = c(rep("non-user", 401), rep("smoker", 401)),
mean = c(lpmat_non %*% wi_smoker_gam.coef,
lpmat_smk %*% wi_smoker_gam.coef),
se = c(sqrt(diag(lpmat_non %*% wi_smoker_gam.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_smk %*% wi_smoker_gam.cov %*% t(lpmat_smk)))),
stringsAsFactors = F)
# ggplot(data = pred_df, aes(x = frame, y = mean, group = user, col = user))+
# geom_line()+
# geom_line(aes(x=frame, y = mean + 2*se, group = user, col = user), linetype = "longdash")+
# geom_line(aes(x=frame, y = mean - 2*se, group = user, col = user), linetype = "longdash")+
# theme_bw()
wi_smkProfile <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
scale_y_continuous(limits = c(min(pred_df$mean - 2*pred_df$se),
max(pred_df$mean+2*pred_df$se)))+
labs(y = "Within Person Difference in Percent Change")+
theme_bw()
legend_smkProfile <- g_legend(wi_smkProfile)
wi_smkProfile.se <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
geom_line(aes(x=seconds, y = mean + 2*se, group = user, col = user),
linetype = "longdash")+
geom_line(aes(x=seconds, y = mean - 2*se, group = user, col = user),
linetype = "longdash")+
scale_y_continuous(limits = c(min(pred_df$mean - 2*pred_df$se),
max(pred_df$mean+2*pred_df$se)))+
labs(y = "")+
theme_bw()
wi_smk <- grid.arrange(arrangeGrob(wi_smkProfile+theme(legend.position = "none"),
wi_smkProfile.se+theme(legend.position = "none"), nrow = 1),
legend_smkProfile, nrow = 1,
widths = c(30, 6))
# pred_f0 <- predict(post_smoker_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "iterms")
pred_f1 <- predict(wi_smoker_gam.fri, newdata=df_pred_smk, se.fit=TRUE, type = "terms")
pred_coef_df <- data.frame(seconds = seq(0, 400)/30,
f0_hat = lpmat_non %*% wi_smoker_gam.coef,
f0_se = sqrt(diag(lpmat_non %*% wi_smoker_gam.cov %*% t(lpmat_non))),
f1_hat = pred_f1$fit[, 2],
f1_se = pred_f1$se.fit[, 2])
# ggplot(data = pred_coef_df, aes(x=frame, y = f0_hat))+
# geom_line()+
# geom_line(aes(x = frame, y = f0_hat + 2*f0_se), linetype = "longdash")+
# geom_line(aes(x = frame, y = f0_hat - 2*f0_se), linetype = "longdash")+
# labs(title = "Average Within Person Difference in Percent Change of a Non-user", ylab = "f0_hat")+
# theme_bw()
wi_smk_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f1_hat))+
geom_line()+
geom_line(aes(x = seconds, y = f1_hat + 2*f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f1_hat - 2*f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = 0), col = "darkred")+
labs(title = "Difference in Within Person Difference (WPD) in Percent Change \nbetween Smoker and Non-smoker",
y = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
ylab <- textGrob(label = "Difference in WPD in Percent Change", rot = 90,
gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("More WPD Constriction in User",
width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("Less WPD Constriction in User",
width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
wi_smk_coef <- grid.arrange(ylab, posval, negval, wi_smk_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
smk.df <- pt.analytic.df[, c("subject_id", "user_cat")]
smk.df$smoker <- ifelse(pt.analytic.df$user_cat %in% c("daily", "occasional"), 1, 0)
# Zsm <- post_right_0.fpca$Yhat
pctChg_names <- names(right_0.post.w)[grepl("percent_change", names(right_0.post.w)) == T]
### Truncate to 350 frames
frame_max = 350
pctChg_names_350 <- pctChg_names[as.numeric(substr(pctChg_names, 16, 18)) <= frame_max]
sofr_right_post <- right_0.post.w[, pctChg_names_350]
ncols = length(pctChg_names_350)
sind <- seq(0, 1, len = ncols)
smat <- matrix(sind, nrow(smk.df), ncols, byrow = T)
# smk.df$Zsm <- I(Zsm)
smk.df$Zraw <- I(as.matrix(sofr_right_post))
smk.df$smat <- I(smat)
smk.df$lmat <- I(matrix(1/ncols, nrow(smk.df), ncols))
smk.df$zlmat <- I(smk.df$lmat * smk.df$Zraw)
smk_fglm_ps <- gam(smoker ~ s(smat, by=zlmat, bs = "cr", k = 30), data=smk.df,
method = "REML", family = binomial)
summary(smk_fglm_ps)
##
## Family: binomial
## Link function: logit
##
## Formula:
## smoker ~ s(smat, by = zlmat, bs = "cr", k = 30)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0928 0.6225 1.756 0.0792 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(smat):zlmat 5.157 5.939 9.592 0.114
##
## R-sq.(adj) = 0.123 Deviance explained = 14.6%
## -REML = 50.035 Scale est. = 1 n = 79
roc_in_sample <- roc(smk_fglm_ps$model$smoker, smk_fglm_ps$fitted.values)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(roc_in_sample)
## Area under the curve: 0.7379
df_sofr <- data.frame("smat" = sind, "zlmat" = 1)
sofr_pred <- predict(smk_fglm_ps, newdata = df_sofr, , se.fit = TRUE, type = "iterms")
plot.sofr <- data.frame("frame" = 0:(ncols -1),
"f0" = sofr_pred$fit[, 1],
'f0.se' = sofr_pred$se.fit[, 1])
ggplot(data = plot.sofr, aes(x=frame, y = exp(f0)))+
geom_line()+
geom_line(aes(x=frame, y = exp(f0-2*f0.se)), linetype = 'longdash')+
geom_line(aes(x=frame, y = exp(f0+2*f0.se)), linetype = 'longdash')+
geom_hline(yintercept = 1, color = "red", linetype = 3)+
labs(title = "Odd Ratio of being a smoker vs non-smoker over the test duration",
x = "frame")+
theme_bw()
ggplot(data = plot.sofr, aes(x=frame/300, y = f0))+
geom_line()+
geom_line(aes(x=frame/300, y = f0-2*f0.se), linetype = 'longdash')+
geom_line(aes(x=frame/300, y = f0+2*f0.se), linetype = 'longdash')+
geom_hline(yintercept = 1, color = "red", linetype = 3)+
labs(title = "Odd Ratio of being a smoker vs non-smoker over the test duration",
x = "seconds")+
theme_bw()
sofr_impute_df <- right_0.post.w[, pctChg_names]
rownames(right_0.post.w$subject_id)
## NULL
sofr_fpca <- fpca.face(as.matrix(sofr_impute_df))$Yhat
names(sofr_fpca) <- names(sofr_impute_df)
sofr_imputed <- sofr_impute_df
sofr_imputed[is.na(sofr_imputed)] <- sofr_fpca[is.na(sofr_imputed)]
ncols = ncol(sofr_imputed)
sind <- seq(0, 1, len = ncols)
smat <- matrix(sind, nrow(smk.df), ncols, byrow = T)
# smk.df$Zsm <- I(Zsm)
smk.df$Zraw <- I(as.matrix(sofr_imputed))
smk.df$smat <- I(smat)
smk.df$lmat <- I(matrix(1/ncols, nrow(smk.df), ncols))
smk.df$zlmat <- I(smk.df$lmat * smk.df$Zraw)
smk_fglm_ps2 <- gam(smoker ~ s(smat, by=zlmat, bs = "cr", k = 30), data=smk.df,
method = "REML", family = binomial)
summary(smk_fglm_ps2)
##
## Family: binomial
## Link function: logit
##
## Formula:
## smoker ~ s(smat, by = zlmat, bs = "cr", k = 30)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.3731 0.5711 2.404 0.0162 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(smat):zlmat 4.79 5.463 10.78 0.105
##
## R-sq.(adj) = 0.117 Deviance explained = 13.1%
## -REML = 52.945 Scale est. = 1 n = 84
roc_in_sample2 <- roc(smk_fglm_ps2$model$smoker, smk_fglm_ps2$fitted.values)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(roc_in_sample2)
## Area under the curve: 0.7135
hist(pt.analytic.df$vas1_high_score[pt.analytic.df$user_cat == "daily"], breaks = 15)
hist(pt.analytic.df$vas1_high_score[pt.analytic.df$user_cat == "occasional"], breaks = 15)
boxplot(vas1_high_score ~ user_cat, data = pt.analytic.df)
summary(pt.analytic.df$vas1_high_score[pt.analytic.df$user_cat == "daily"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.00 42.00 46.00 47.24 57.00 78.00
summary(pt.analytic.df$vas1_high_score[pt.analytic.df$user_cat == "occasional"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.00 37.25 51.25 48.23 61.12 79.00
by(pt.analytic.df$vas1_high_score, INDICES = pt.analytic.df$user_cat, FUN = summary)
## pt.analytic.df$user_cat: non-user
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 0 0 0
## ------------------------------------------------------------
## pt.analytic.df$user_cat: occasional
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.00 37.25 51.25 48.23 61.12 79.00
## ------------------------------------------------------------
## pt.analytic.df$user_cat: daily
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.00 42.00 46.00 47.24 57.00 78.00
sum(pt.analytic.df$vas1_high_score[pt.analytic.df$user_cat == "occasional"] >= 46, na.rm = T)
## [1] 17
sum(pt.analytic.df$vas1_high_score[pt.analytic.df$user_cat == "occasional"] < 46, na.rm = T)
## [1] 13
pt.analytic.df$vas_high <- NA
pt.analytic.df$vas_high[pt.analytic.df$vas1_high_score >= 46 &
pt.analytic.df$user_cat == "occasional"] <- 1
pt.analytic.df$vas_high[pt.analytic.df$vas1_high_score < 46 &
pt.analytic.df$user_cat == "occasional"] <- 0
AllSmokers <- pt.analytic.df[pt.analytic.df$user_cat != "non-user", ]
preAllSmokers <- lm(min_constriction.pre2 ~ vas1_high_score, data = AllSmokers)
postAllSmokers <- lm(min_constriction.post ~ vas1_high_score, data = AllSmokers)
chgAllSmokers <- lm(min_constriction_chg ~ vas1_high_score, data = AllSmokers)
pre_vas_minConstrict <- ggplot(data = AllSmokers,
aes(x = vas1_high_score,
y = min_constriction.pre2, col = user_cat))+
geom_point()+
scale_y_continuous(limits = c(-55, 0))+
geom_vline(xintercept = 46,color = "darkred")+
labs(title = "POST VAS Feel High Score vs PRE minimal Constriction")+
geom_smooth(aes(col = NULL), method = "lm", se=FALSE,
color="black", formula = y ~ x)+
geom_label(label = paste(expression("beta == "),
round(summary(preAllSmokers)$coefficients[2, 1], 2)),
x = 3,
y = 0,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("p =="),
round(summary(preAllSmokers)$coefficients[2, 4], 2)),
x = 3,
y = -3,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("rho == "),
round(cor(AllSmokers$min_constriction.pre2,
AllSmokers$vas1_high_score), 3)),
x = 3,
y = -6,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
theme_bw()
post_vas_minConstrict <- ggplot(data = AllSmokers,
aes(x = vas1_high_score, y = min_constriction.post, col = user_cat))+
geom_point()+
scale_y_continuous(limits = c(-55, 0))+
geom_vline(xintercept = 46,color = "darkred")+
labs(title = "POST VAS Feel High Score vs POST minimal Constriction")+
geom_smooth(aes(col = NULL), method = "lm", se=FALSE,
color="black", formula = y ~ x)+
geom_label(label = paste(expression("beta == "),
round(summary(postAllSmokers)$coefficients[2, 1], 2)),
x = 3,
y = 0,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("p =="),
round(summary(postAllSmokers)$coefficients[2, 4], 2)),
x = 3,
y = -3,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("rho == "),
round(cor(AllSmokers$min_constriction.post,
AllSmokers$vas1_high_score), 3)),
x = 3,
y = -6,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
theme_bw()
chg_vas_minConstrict <- ggplot(data = AllSmokers,
aes(x = vas1_high_score, y = min_constriction_chg, col = user_cat))+
geom_point()+
scale_y_continuous(limits = c(-20, 40))+
geom_vline(xintercept = 46,color = "darkred")+
labs(title = "POST VAS Feel High Score vs Change in minimal Constriction (post-pre)")+
geom_smooth(aes(col = NULL), method = "lm", se=FALSE,
color="black", formula = y ~ x)+
geom_label(label = paste(expression("beta == "),
round(summary(chgAllSmokers)$coefficients[2, 1], 2)),
x = 3,
y = -15,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("p =="),
round(summary(chgAllSmokers)$coefficients[2, 4], 2)),
x = 3,
y = -18,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("rho == "),
round(cor(AllSmokers$min_constriction_chg,
AllSmokers$vas1_high_score), 3)),
x = 3,
y = -21,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
theme_bw()
# jpeg(file.path(res_folder, "Scatterplot_VAS_MinConstriction_AllSmokers.jpg"),
# height = 6, width = 23, res = 300, units = "in")
grid.arrange(pre_vas_minConstrict, post_vas_minConstrict , chg_vas_minConstrict, nrow = 1)
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
# dev.off()
### Occasional Users
OccSmokers <- AllSmokers[AllSmokers$user_cat == "occasional", ]
preOccSmokers <- lm(min_constriction.pre2 ~ vas1_high_score, data = OccSmokers)
postOccSmokers <- lm(min_constriction.post ~ vas1_high_score, data = OccSmokers)
chgOccSmokers <- lm(min_constriction_chg ~ vas1_high_score, data = OccSmokers)
pre_vas_minConstrict_occ <- ggplot(data = OccSmokers,
aes(x = vas1_high_score,
y = min_constriction.pre2))+
geom_point()+
scale_y_continuous(limits = c(-55, 0))+
geom_vline(xintercept = 46,color = "darkred")+
labs(title = "POST VAS Feel High Score vs PRE minimal Constriction \nOccasional Smokers")+
geom_smooth(aes(col = NULL), method = "lm", se=FALSE,
color="black", formula = y ~ x)+
geom_label(label = paste(expression("beta == "),
round(summary(preOccSmokers)$coefficients[2, 1], 2)),
x = 3,
y = 0,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("p =="),
round(summary(preOccSmokers)$coefficients[2, 4], 2)),
x = 3,
y = -3,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("rho == "),
round(cor(OccSmokers$min_constriction.pre2,
OccSmokers$vas1_high_score), 3)),
x = 3,
y = -6,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
theme_bw()
post_vas_minConstrict_occ <- ggplot(data = OccSmokers,
aes(x = vas1_high_score, y = min_constriction.post))+
geom_point()+
scale_y_continuous(limits = c(-55, 0))+
geom_vline(xintercept = 46,color = "darkred")+
labs(title = "POST VAS Feel High Score vs POST minimal Constriction \nOccasional Smokers")+
geom_smooth(aes(col = NULL), method = "lm", se=FALSE,
color="black", formula = y ~ x)+
geom_label(label = paste(expression("beta == "),
round(summary(postOccSmokers)$coefficients[2, 1], 2)),
x = 3,
y = 0,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("p =="),
round(summary(postOccSmokers)$coefficients[2, 4], 2)),
x = 3,
y = -3,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("rho == "),
round(cor(OccSmokers$min_constriction.post,
OccSmokers$vas1_high_score), 3)),
x = 3,
y = -6,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
theme_bw()
chg_vas_minConstrict_occ <- ggplot(data = OccSmokers,
aes(x = vas1_high_score, y = min_constriction_chg))+
geom_point()+
scale_y_continuous(limits = c(-20, 40))+
geom_vline(xintercept = 46,color = "darkred")+
labs(title = "POST VAS Feel High Score vs Change in minimal Constriction (post-pre) \nOccasional Smokers")+
geom_smooth(aes(col = NULL), method = "lm", se=FALSE,
color="black", formula = y ~ x)+
geom_label(label = paste(expression("beta == "),
round(summary(chgOccSmokers)$coefficients[2, 1], 2)),
x = 3,
y = -15,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("p =="),
round(summary(chgOccSmokers)$coefficients[2, 4], 2)),
x = 3,
y = -18,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("rho == "),
round(cor(OccSmokers$min_constriction_chg,
OccSmokers$vas1_high_score), 3)),
x = 3,
y = -21,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
theme_bw()
# jpeg(file.path(res_folder, "Scatterplot_VAS_MinConstriction_Occ.jpg"),
# height = 6, width = 23, res = 300, units = "in")
grid.arrange(pre_vas_minConstrict_occ, post_vas_minConstrict_occ , chg_vas_minConstrict_occ, nrow = 1)
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Removed 1 rows containing missing values (geom_point).
# dev.off()
### Daily Users
DlySmokers <- AllSmokers[AllSmokers$user_cat == "daily", ]
preDlySmokers <- lm(min_constriction.pre2 ~ vas1_high_score, data = DlySmokers)
postDlySmokers <- lm(min_constriction.post ~ vas1_high_score, data = DlySmokers)
chgDlySmokers <- lm(min_constriction_chg ~ vas1_high_score, data = DlySmokers)
pre_vas_minConstrict_dly <- ggplot(data = DlySmokers,
aes(x = vas1_high_score,
y = min_constriction.pre2))+
geom_point()+
scale_y_continuous(limits = c(-55, 0))+
geom_vline(xintercept = 46,color = "darkred")+
labs(title = "POST VAS Feel High Score vs PRE minimal Constriction \nDaily Smokers")+
geom_smooth(aes(col = NULL), method = "lm", se=FALSE,
color="black", formula = y ~ x)+
geom_label(label = paste(expression("beta == "),
round(summary(preDlySmokers)$coefficients[2, 1], 2)),
x = 13,
y = 0,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("p =="),
round(summary(preDlySmokers)$coefficients[2, 4], 2)),
x = 13,
y = -3,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("rho == "),
round(cor(DlySmokers$min_constriction.pre2,
DlySmokers$vas1_high_score), 3)),
x = 13,
y = -6,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
theme_bw()
post_vas_minConstrict_dly <- ggplot(data = DlySmokers,
aes(x = vas1_high_score, y = min_constriction.post))+
geom_point()+
scale_y_continuous(limits = c(-55, 0))+
geom_vline(xintercept = 46,color = "darkred")+
labs(title = "POST VAS Feel High Score vs POST minimal Constriction \nDaily Smokers")+
geom_smooth(aes(col = NULL), method = "lm", se=FALSE,
color="black", formula = y ~ x)+
geom_label(label = paste(expression("beta == "),
round(summary(postDlySmokers)$coefficients[2, 1], 2)),
x = 13,
y = 0,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("p =="),
round(summary(postDlySmokers)$coefficients[2, 4], 2)),
x = 13,
y = -3,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("rho == "),
round(cor(DlySmokers$min_constriction.post,
DlySmokers$vas1_high_score), 3)),
x = 13,
y = -6,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
theme_bw()
chg_vas_minConstrict_dly <- ggplot(data = DlySmokers,
aes(x = vas1_high_score, y = min_constriction_chg))+
geom_point()+
scale_y_continuous(limits = c(-20, 40))+
geom_vline(xintercept = 46,color = "darkred")+
labs(title = "POST VAS Feel High Score vs Change in minimal Constriction (post-pre) \nDaily Smokers")+
geom_smooth(aes(col = NULL), method = "lm", se=FALSE,
color="black", formula = y ~ x)+
geom_label(label = paste(expression("beta == "),
round(summary(chgDlySmokers)$coefficients[2, 1], 2)),
x = 13,
y = -15,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("p =="),
round(summary(chgDlySmokers)$coefficients[2, 4], 2)),
x = 13,
y = -18,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
geom_label(label = paste(expression("rho == "),
round(cor(DlySmokers$min_constriction_chg,
DlySmokers$vas1_high_score), 3)),
x = 13,
y = -21,
color = "black",
label.size = NA,
parse = T,
hjust = 0)+
theme_bw()
# jpeg(file.path(res_folder, "Scatterplot_VAS_MinConstriction_DLY.jpg"),
# height = 6, width = 23, res = 300, units = "in")
grid.arrange(pre_vas_minConstrict_dly, post_vas_minConstrict_dly , chg_vas_minConstrict_dly, nrow = 1)
# dev.off()
right_0.vas.df <- merge(right_0.post, pt.analytic.df[!(is.na(pt.analytic.df$vas_high)), ],
by = c("subject_id", "user_cat"))
right_0.vas.df <- right_0.vas.df[order(right_0.vas.df$subject_id, right_0.vas.df$frame), ]
right_0.vas.df$sind <- right_0.vas.df$frame/400
m.post_vas <- mgcv::gam(percent_change ~ s(sind, k=30, bs="cr") +
s(sind, by=vas_high, k=30, bs = "cr"),
data = right_0.vas.df, method = "REML")
## Create a matrix of residuals
post_vas.resid <- cbind(right_0.vas.df[!(is.na(right_0.vas.df$percent_change)),
c("subject_id", "frame")],
m.post_vas$residuals)
names(post_vas.resid) <- c("subject_id", "frame", "resid")
post_vas.resid$frame_char <- str_pad(post_vas.resid$frame, width = 3, side = "left", pad = "0")
resid_mat <- reshape(post_vas.resid[, c("subject_id", "frame_char", "resid")],
timevar = "frame_char",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
resid_names <- names(resid_mat)
resid_names <- sort(resid_names)
resid_mat <- resid_mat[, resid_names]
resid_mat <- as.matrix(resid_mat)
post_vas.fpca <- fpca.face(resid_mat, knots = 15)
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_vas.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_vas.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
right_0.vas.df <- merge(right_0.vas.df, Phi_mat,
by = "frame",
all.x = T)
right_0.vas.df <- right_0.vas.df[order(right_0.vas.df$subject_id, right_0.vas.df$frame), ]
right_0.vas.df$subject_id <- as.factor(right_0.vas.df$subject_id)
post_vas.fri <- mgcv::bam(percent_change ~
s(sind, k=30, bs="cr") +
s(sind, by=vas_high, k=30, bs = "cr") +
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"),
method = "fREML", data = right_0.vas.df, discrete = TRUE)
summary(post_vas.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = vas_high,
## k = 30, bs = "cr") + s(subject_id, by = Phi1, bs = "re") +
## s(subject_id, by = Phi2, bs = "re") + s(subject_id, by = Phi3,
## bs = "re") + s(subject_id, by = Phi4, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.961 1.046 -7.608 2.99e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 24.53 26.55 40.798 <2e-16 ***
## s(sind):vas_high 19.26 22.39 6.104 <2e-16 ***
## s(subject_id):Phi1 28.97 30.00 27400.457 <2e-16 ***
## s(subject_id):Phi2 28.80 30.00 6461.315 <2e-16 ***
## s(subject_id):Phi3 28.87 30.00 2011.017 <2e-16 ***
## s(subject_id):Phi4 29.11 30.00 1262.366 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.97 Deviance explained = 97%
## fREML = 19574 Scale est. = 1.6192 n = 11496
post_vas.coef <- post_vas.fri$coefficients
post_vas.cov <- vcov.gam(post_vas.fri)
df_pred_non <- data.frame("sind" = unique(right_0.vas.df$sind), "vas_high" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = right_0.vas.df$subject_id[1])
lpmat_non <- predict(post_vas.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
df_pred_high <- data.frame("sind" = unique(right_0.vas.df$sind), "vas_high" = 1,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = right_0.vas.df$subject_id[1])
lpmat_high <- predict(post_vas.fri, newdata=df_pred_high, se.fit=TRUE, type = "lpmatrix")
pred_df <- data.frame(seconds = rep(seq(0, 400), 2)/30,
user = c(rep(0, 401), rep(1, 401)),
mean = c(lpmat_non %*% post_vas.coef,
lpmat_high %*% post_vas.coef),
se = c(sqrt(diag(lpmat_non %*% post_vas.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_high %*% post_vas.cov %*% t(lpmat_high)))),
stringsAsFactors = F)
pred_df$user <- factor(pred_df$user,
levels = c(0, 1),
labels = c("not high", "high"))
post_vasProfile <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
labs(y = "Percent Change")+
theme_bw()
legend_vasProfile <- g_legend(post_vasProfile)
post_vasProfile.se <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
geom_line(aes(x=seconds, y = mean + 2*se, group = user, col = user),
linetype = "longdash")+
geom_line(aes(x=seconds, y = mean - 2*se, group = user, col = user),
linetype = "longdash")+
labs(y = "")+
theme_bw()
post_vasTraj <- grid.arrange(arrangeGrob(post_vasProfile+theme(legend.position = "none"),
post_vasProfile.se+theme(legend.position = "none"),
nrow = 1),
legend_vasProfile, nrow = 1,
widths = c(30, 6))
right_0.vas.df$vas_high <- factor(right_0.vas.df$vas_high,
levels = c(0,1),
labels = c("not high", "high"))
ggplot(right_0.vas.df, aes(x=seconds, y = percent_change, group = subject_id,
col = vas_high))+
geom_line()+
geom_line(data = pred_df[pred_df$user == "not high",],
aes(x=seconds, y = mean, group = user), color = "darkred", size = 1.2)+
geom_line(data = pred_df[pred_df$user == "high",],
aes(x=seconds, y = mean, group = user), color = "darkblue", size = 1.2)+
theme_bw()
## Warning: Removed 534 row(s) containing missing values (geom_path).
pred_f1 <- predict(post_vas.fri, newdata=df_pred_high, se.fit=TRUE, type = "terms")
pred_coef_df <- data.frame(seconds = seq(0, 400)/30,
f1_hat = pred_f1$fit[, 2],
f1_se = pred_f1$se.fit[, 2])
post_high_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f1_hat))+
geom_line()+
geom_line(aes(x = seconds, y = f1_hat + 2*f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f1_hat - 2*f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = 0), col = "darkred")+
labs(title = "Difference in Percent Change: VAS high vs not (Post)", ylab = "Percent Change")+
theme_bw()
ylab <- textGrob(label = "Difference in Percent Change", rot = 90,
gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("Less Constriction in User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("More Constriction in User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
post_high_coef <- grid.arrange(ylab, posval, negval, post_high_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
post_high_coef
## TableGrob (2 x 17) "arrange": 4 grobs
## z cells name grob
## 1 1 ( 1- 2, 1- 1) arrange text[GRID.text.5306]
## 2 2 ( 1- 1, 2- 2) arrange text[GRID.text.5307]
## 3 3 ( 2- 2, 2- 2) arrange text[GRID.text.5308]
## 4 4 ( 1- 2, 3-17) arrange gtable[layout]
right_0.post.w2 <- right_0.post.w
right.names <- names(right_0.post.w2)
right.names <- c(right.names[1:3], paste0("R_", right.names[4:(length(right.names)-1)]), right.names[length(right.names)])
names(right_0.post.w2) <- right.names
left_0.post.w2 <- left_0.post.w
left.names <- names(left_0.post.w2)
left.names <- c(left.names[1:3], paste0("L_", left.names[4:(length(left.names)-1)]), left.names[length(left.names)])
names(left_0.post.w2) <- left.names
botheyes <- merge(right_0.post.w2, left_0.post.w2,
by = c("subject_id", "tp", "user_cat"))
right.corr.names <- right.names[4:(length(right.names)-1)]
left.corr.names <- left.names[4:(length(left.names)-1)]
botheye.corr <- round(cor(botheyes[, right.corr.names[-1]],
botheyes[, left.corr.names[-1]], use = "pairwise.complete.obs"), 3)
corr_df <- data.frame(frame = seq(1, 400, by = 1),
btwEyeCor = diag(botheye.corr))
# get_upper_tri <- function(cormat){
# cormat[lower.tri(cormat)]<- NA
# return(cormat)
# }
#
# upper_tri <- get_upper_tri(botheye.corr)
# library(reshape2)
#
# melted_cor_mat <- melt(botheye.corr)
# names(melted_cor_mat) <- c("RightEye", "LeftEye", "value")
#
# jpeg(file.path(res_folder, "Corr_Left_Right_Eyes.jpg"),
# height = 6, width = 15, res = 300, units = "in")
# ggplot(data = melted_cor_mat, aes(RightEye, LeftEye, fill = value))+
# geom_tile()+
# scale_fill_gradient2(low = "blue", high = "red", mid = "white",
# midpoint = 0, limit = c(-0.2,1), space = "Lab",
# name="Pearson\nCorrelation") +
# theme_bw()+
# theme(axis.text.x = element_blank(),
# axis.text.y = element_blank())+
# coord_fixed()
# dev.off()
ggplot(data = corr_df, aes(frame, btwEyeCor))+
geom_point()+
scale_y_continuous(limits = c(0,1))+
labs(title = "Correlation between eyes -- POST")+
theme_bw()
right_0.pt.w2 <- right_0.pt.w
right.names <- names(right_0.pt.w2)
right.names <- c(right.names[1:2], paste0("R_", right.names[3:(length(right.names)-1)]), right.names[length(right.names)])
names(right_0.pt.w2) <- right.names
left_0.pt.w2 <- left_0.pt.w
left.names <- names(left_0.pt.w2)
left.names <- c(left.names[1:2], paste0("L_", left.names[3:(length(left.names)-1)]), left.names[length(left.names)])
names(left_0.pt.w2) <- left.names
wpd_botheyes <- merge(right_0.pt.w2, left_0.pt.w2,
by = c("subject_id", "user_cat"))
right.corr.names <- right.names[3:(length(right.names)-1)]
left.corr.names <- left.names[3:(length(left.names)-1)]
wpd_botheye.corr <- round(cor(wpd_botheyes[, right.corr.names],
wpd_botheyes[, left.corr.names],
use = "pairwise.complete.obs"), 3)
wpd_corr_df <- data.frame(frame = seq(1, 400, by = 1),
btwEyeCor = diag(wpd_botheye.corr))
ggplot(data = wpd_corr_df, aes(frame, btwEyeCor))+
geom_point()+
scale_y_continuous(limits = c(0,1))+
labs(title = "Correlation between eyes -- WPD")+
theme_bw()
wi.consump.m <- lm(postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6, data = pt.wi.scores)
summary(wi.consump.m)
##
## Call:
## lm(formula = postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4 +
## PC5 + PC6, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9565 -3.5339 -0.7514 2.5591 18.6423
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.910985 0.796032 77.775 <2e-16 ***
## PC1 0.004315 0.005670 0.761 0.450
## PC2 0.010014 0.011696 0.856 0.396
## PC3 0.022049 0.014884 1.481 0.145
## PC4 -0.002204 0.017981 -0.123 0.903
## PC5 -0.020716 0.025855 -0.801 0.427
## PC6 0.025848 0.032574 0.794 0.431
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.581 on 48 degrees of freedom
## (29 observations deleted due to missingness)
## Multiple R-squared: 0.1067, Adjusted R-squared: -0.004993
## F-statistic: 0.9553 on 6 and 48 DF, p-value: 0.4653
post.consump.m <- lm(postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.consump.m)
##
## Call:
## lm(formula = postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.3643 -3.2540 -0.4397 2.7332 19.2217
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.615735 0.792896 78.971 <2e-16 ***
## PC1 -0.007264 0.006659 -1.091 0.281
## PC2 0.029166 0.018235 1.599 0.116
## PC3 -0.022868 0.031685 -0.722 0.474
## PC4 0.025181 0.039465 0.638 0.526
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.544 on 50 degrees of freedom
## (29 observations deleted due to missingness)
## Multiple R-squared: 0.08165, Adjusted R-squared: 0.008178
## F-statistic: 1.111 on 4 and 50 DF, p-value: 0.3616
pt.analytic.df <- merge(pt.analytic.df, pt.wi.scores[, c("subject_id", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6")],
by = "subject_id")
names(pt.analytic.df)[names(pt.analytic.df) == "PC1"] <- "wpd.PC1"
names(pt.analytic.df)[names(pt.analytic.df) == "PC2"] <- "wpd.PC2"
names(pt.analytic.df)[names(pt.analytic.df) == "PC3"] <- "wpd.PC3"
names(pt.analytic.df)[names(pt.analytic.df) == "PC4"] <- "wpd.PC4"
names(pt.analytic.df)[names(pt.analytic.df) == "PC5"] <- "wpd.PC5"
names(pt.analytic.df)[names(pt.analytic.df) == "PC6"] <- "wpd.PC6"
pt.analytic.df <- merge(pt.analytic.df, pt.scores[, c("subject_id", "PC1", "PC2", "PC3", "PC4")],
by = "subject_id")
names(pt.analytic.df)[names(pt.analytic.df) == "PC1"] <- "post.PC1"
names(pt.analytic.df)[names(pt.analytic.df) == "PC2"] <- "post.PC2"
names(pt.analytic.df)[names(pt.analytic.df) == "PC3"] <- "post.PC3"
names(pt.analytic.df)[names(pt.analytic.df) == "PC4"] <- "post.PC4"
table1(~ postConsumpTimeToTest + Post_PreTime|user_cat,
data = pt.analytic.df)
| non-user (N=29) |
occasional (N=30) |
daily (N=25) |
Overall (N=84) |
|
|---|---|---|---|---|
| postConsumpTimeToTest | ||||
| Mean (SD) | NA (NA) | 63.9 (6.26) | 60.2 (3.78) | 62.2 (5.57) |
| Median [Min, Max] | NA [NA, NA] | 63.5 [54.0, 84.0] | 60.0 [53.0, 67.0] | 62.0 [53.0, 84.0] |
| Missing | 29 (100%) | 0 (0%) | 0 (0%) | 29 (34.5%) |
| Post_PreTime | ||||
| Mean (SD) | 110 (8.90) | 116 (9.72) | 116 (6.58) | 114 (8.97) |
| Median [Min, Max] | 109 [99.0, 131] | 114 [104, 147] | 117 [106, 137] | 112 [99.0, 147] |
by(pt.analytic.df$postConsumpTimeToTest, INDICES = list(pt.analytic.df$user_cat), summary)
## : non-user
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## NA NA NA NaN NA NA 29
## ------------------------------------------------------------
## : occasional
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 54.00 61.25 63.50 63.93 67.00 84.00
## ------------------------------------------------------------
## : daily
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 53.00 58.00 60.00 60.16 62.00 67.00
postConsump.cor <- cor(pt.analytic.df$postConsumpTimeToTest,
pt.analytic.df[, c("demo_age", "demo_weight", "demo_height", "BMI",
"thc.post", "min_constriction.post", "slope.post",
"AUC.post", "thc_chg", "min_constriction_chg",
"AUC_chg", "wpd.PC1", "wpd.PC2", "wpd.PC3",
"wpd.PC4", "wpd.PC5", "wpd.PC6",
"post.PC1", "post.PC2", "post.PC3", "post.PC4",
"vas1_high_score", "vas_high_score_chg",
"vas1_score_dc", "vas_score_dc_chg",
"Post_PreTime")],
use = "pairwise.complete.obs")
postConsump.rcorr <- rcorr(as.matrix(pt.analytic.df[, c("postConsumpTimeToTest",
"demo_age", "demo_weight",
"demo_height", "BMI",
"thc.post", "min_constriction.post",
"slope.post", "AUC.post",
"thc_chg", "min_constriction_chg",
"AUC_chg",
"wpd.PC1", "wpd.PC2", "wpd.PC3",
"wpd.PC4", "wpd.PC5", "wpd.PC6",
"post.PC1", "post.PC2",
"post.PC3", "post.PC4",
"vas1_high_score",
"vas_high_score_chg",
"vas1_score_dc",
"vas_score_dc_chg",
"Post_PreTime")]))
postConsump.cor.df <- data.frame(postConsump_rho = postConsump.rcorr$r[, 1],
postConsump_p = postConsump.rcorr$P[, 1],
postConsump_n = postConsump.rcorr$n[, 1])
postConsump.cor.df <- postConsump.cor.df[order(-1*abs(postConsump.cor.df$postConsump_rho)), ]
postConsump.cor.df <- postConsump.cor.df[-1, ]
rm(postConsump.rcorr)
knitr::kable(postConsump.cor.df)
| postConsump_rho | postConsump_p | postConsump_n | |
|---|---|---|---|
| Post_PreTime | 0.3894282 | 0.0032958 | 55 |
| min_constriction.post | -0.2167006 | 0.1120314 | 55 |
| post.PC2 | 0.2105140 | 0.1229040 | 55 |
| wpd.PC3 | 0.2014399 | 0.1402745 | 55 |
| vas1_high_score | 0.1823385 | 0.1827289 | 55 |
| vas_high_score_chg | 0.1814208 | 0.1849790 | 55 |
| AUC_chg | 0.1676010 | 0.2212969 | 55 |
| wpd.PC6 | 0.1653819 | 0.2275616 | 55 |
| min_constriction_chg | 0.1476973 | 0.2818785 | 55 |
| post.PC1 | -0.1419379 | 0.3012731 | 55 |
| wpd.PC5 | -0.1410364 | 0.3043854 | 55 |
| vas_score_dc_chg | -0.1409231 | 0.3047780 | 55 |
| vas1_score_dc | -0.1393192 | 0.3103704 | 55 |
| wpd.PC1 | 0.1380900 | 0.3147006 | 55 |
| slope.post | 0.1323212 | 0.3355358 | 55 |
| BMI | -0.1285025 | 0.3497909 | 55 |
| post.PC3 | -0.1163836 | 0.3974525 | 55 |
| thc.post | -0.1124720 | 0.4180944 | 54 |
| thc_chg | -0.1094518 | 0.4307816 | 54 |
| wpd.PC2 | 0.1082799 | 0.4313412 | 55 |
| demo_weight | -0.1043995 | 0.4481278 | 55 |
| AUC.post | -0.0775811 | 0.5734411 | 55 |
| demo_age | 0.0601910 | 0.6624510 | 55 |
| wpd.PC4 | -0.0401901 | 0.7707995 | 55 |
| demo_height | -0.0133431 | 0.9229754 | 55 |
| post.PC4 | 0.0121542 | 0.9298203 | 55 |
ggplot(data = pt.analytic.df[pt.analytic.df$user_cat != "non-user", ], aes(x = user_cat, y = postConsumpTimeToTest, col = user_cat))+
geom_boxplot()+
theme_bw()
ggplot(data = pt.analytic.df[pt.analytic.df$user_cat != "non-user", ], aes(x = demo_gender, y = postConsumpTimeToTest, col = demo_gender))+
geom_boxplot()+
theme_bw()
ggplot(data = pt.analytic.df[pt.analytic.df$user_cat != "non-user", ], aes(x = postConsumpTimeToTest, y = Post_PreTime, col = user_cat))+
geom_point()+
theme_bw()
ggplot(data = pt.analytic.df[pt.analytic.df$user_cat != "non-user", ], aes(x = postConsumpTimeToTest, y = min_constriction.post, col = user_cat))+
geom_point()+
theme_bw()
ggplot(data = pt.analytic.df[pt.analytic.df$user_cat != "non-user", ], aes(x = postConsumpTimeToTest, y = post.PC2, col = user_cat))+
geom_point()+
theme_bw()
ggplot(data = pt.analytic.df[pt.analytic.df$user_cat != "non-user", ], aes(x = postConsumpTimeToTest, y = wpd.PC3, col = user_cat))+
geom_point()+
theme_bw()
\[ \begin{aligned} Y_{i}(t) &= f_0(t) + f_1(t)*I(user = occasional) + f_2(t)*I(user = daily) +f_3(t)*I(user = occasional)*PCTT + f_4(t)*I(user = daily)*PCTT + b_i(t) + \epsilon_i(t)\\ & b_i(t) \sim GP(0, \Sigma_b) \\ & \epsilon_i(t) \sim N(0, \sigma_{\epsilon}^2) \\ &= \sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{1k}\phi_{1k}(t_i)*I(user = occasional) + \sum_{k=1}^K \xi_{2k}\phi_{2k}(t_i)*I(user = daily)\\ &+ \sum_{k=1}^K \xi_{3k}\phi_{3k}(t_i)*I(user = occasional)*PCTT + \sum_{k=1}^K \xi_{4k}\phi_{4k}(t_i)*I(user = daily)*PCTT + \sum_{k=1}^K \xi_{ik}\phi_k(t_i) + \epsilon_i(t)\\ \end{aligned} \]
pt.analytic.df$postConsumpTimeToTest_c <- pt.analytic.df$postConsumpTimeToTest- round(mean(pt.analytic.df$postConsumpTimeToTest, na.rm = T), 3)
summary(pt.analytic.df$postConsumpTimeToTest_c)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -9.218000 -3.218000 -0.218000 0.000182 2.782000 21.782000 29
by(data = pt.analytic.df$postConsumpTimeToTest_c, INDICES = pt.analytic.df$user_cat, FUN = summary)
## pt.analytic.df$user_cat: non-user
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## NA NA NA NaN NA NA 29
## ------------------------------------------------------------
## pt.analytic.df$user_cat: occasional
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.218 -0.968 1.282 1.715 4.782 21.782
## ------------------------------------------------------------
## pt.analytic.df$user_cat: daily
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -9.218 -4.218 -2.218 -2.058 -0.218 4.782
pt.analytic.df$occ_PCTT <- pt.analytic.df$occasional*pt.analytic.df$postConsumpTimeToTest_c
pt.analytic.df$dly_PCTT <- pt.analytic.df$daily*pt.analytic.df$postConsumpTimeToTest_c
pt.analytic.df$occ_PCTT[pt.analytic.df$user == "non-user"] <- 0
pt.analytic.df$dly_PCTT[pt.analytic.df$user == "non-user"] <- 0
post_intPCTT.gam.df <- merge(right_0.post, pt.analytic.df,
by = c("subject_id", "user_cat"))
# right_0.gam.df <- right_0.gam.df[order(right_0.gam.df$subject_id, right_0.gam.df$frame), ]
post_intPCTT.gam.df$sind <- post_intPCTT.gam.df$frame/400
# head(right_0.gam.df)
m.post_intPCTT_gam <- mgcv::bam(percent_change ~ s(sind, k=30, bs="cr") +
s(sind, by=occasional, k=30, bs = "cr") +
s(sind, by=daily, k=30, bs = "cr") +
s(sind, by=occ_PCTT, k=30, bs = "cr") +
s(sind, by=dly_PCTT, k=30, bs = "cr"),
data = post_intPCTT.gam.df, method = "REML")
summary(m.post_intPCTT_gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = occasional,
## k = 30, bs = "cr") + s(sind, by = daily, k = 30, bs = "cr") +
## s(sind, by = occ_PCTT, k = 30, bs = "cr") + s(sind, by = dly_PCTT,
## k = 30, bs = "cr")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.25464 0.07029 -174.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 22.077 25.48 201.94 <2e-16 ***
## s(sind):occasional 10.206 12.42 47.48 <2e-16 ***
## s(sind):daily 9.719 11.83 24.03 <2e-16 ***
## s(sind):occ_PCTT 11.555 14.08 65.76 <2e-16 ***
## s(sind):dly_PCTT 8.630 10.51 12.11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.267 Deviance explained = 26.8%
## -REML = 1.1209e+05 Scale est. = 55.955 n = 32642
## Create a matrix of residuals
post_intPCTT_gam.resid <- cbind(post_intPCTT.gam.df[!(is.na(post_intPCTT.gam.df$percent_change)),
c("subject_id", "frame")],
m.post_intPCTT_gam$residuals)
names(post_intPCTT_gam.resid) <- c("subject_id", "frame", "resid")
# post_gam.resid$frame_char <- str_pad(post_gam.resid$frame, width = 3, side = "left", pad = "0")
resid_mat <- reshape(post_intPCTT_gam.resid[, c("subject_id", "frame", "resid")],
timevar = "frame",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
# resid_names <- names(resid_mat)
# resid_names <- sort(resid_names)
# resid_mat <- resid_mat[, resid_names]
resid_mat <- as.matrix(resid_mat)
post_intPCTT_gam.fpca <- fpca.face(resid_mat, knots = 15)
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
post_intPCTT.gam.df <- merge(post_intPCTT.gam.df, Phi_mat,
by = "frame",
all.x = T)
post_intPCTT.gam.df <- post_intPCTT.gam.df[order(post_intPCTT.gam.df$subject_id, post_intPCTT.gam.df$frame), ]
post_intPCTT.gam.df$subject_id <- as.factor(post_intPCTT.gam.df$subject_id)
post_intPCTT_gam.fri <- mgcv::bam(percent_change ~
s(sind, k=30, bs="cr") +
s(sind, by=occasional, k=30, bs = "cr") +
s(sind, by=daily, k=30, bs = "cr") +
s(sind, by=occ_PCTT, k=30, bs = "cr") +
s(sind, by=dly_PCTT, k=30, bs = "cr")+
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"),
method = "fREML", data = post_intPCTT.gam.df, discrete = TRUE)
summary(post_intPCTT_gam.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = occasional,
## k = 30, bs = "cr") + s(sind, by = daily, k = 30, bs = "cr") +
## s(sind, by = occ_PCTT, k = 30, bs = "cr") + s(sind, by = dly_PCTT,
## k = 30, bs = "cr") + s(subject_id, by = Phi1, bs = "re") +
## s(subject_id, by = Phi2, bs = "re") + s(subject_id, by = Phi3,
## bs = "re") + s(subject_id, by = Phi4, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.1111 0.9229 -10.96 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 27.53 28.42 119.05 <2e-16 ***
## s(sind):occasional 18.76 22.06 77.86 <2e-16 ***
## s(sind):daily 18.16 21.43 38.47 <2e-16 ***
## s(sind):occ_PCTT 21.94 25.24 28.26 <2e-16 ***
## s(sind):dly_PCTT 19.38 22.77 17.82 <2e-16 ***
## s(subject_id):Phi1 81.24 84.00 22076.91 <2e-16 ***
## s(subject_id):Phi2 80.29 84.00 2495.67 <2e-16 ***
## s(subject_id):Phi3 81.38 84.00 787.75 <2e-16 ***
## s(subject_id):Phi4 79.63 84.00 1027.58 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.967 Deviance explained = 96.7%
## fREML = 62846 Scale est. = 2.5436 n = 32642
post_intPCTT_gam.coef <- post_intPCTT_gam.fri$coefficients
post_intPCTT_gam.cov <- vcov.gam(post_intPCTT_gam.fri)
df_pred_non <- data.frame("sind" = unique(post_intPCTT.gam.df$sind), "occasional" = 0, "daily" = 0, "occ_PCTT" = 0, "dly_PCTT" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT.gam.df$subject_id[1])
lpmat_non <- predict(post_intPCTT_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
df_pred_occ <- data.frame("sind" = unique(post_intPCTT.gam.df$sind), "occasional" = 1, "daily" = 0, "occ_PCTT" = 0, "dly_PCTT" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT.gam.df$subject_id[1])
lpmat_occ <- predict(post_intPCTT_gam.fri, newdata=df_pred_occ, se.fit=TRUE, type = "lpmatrix")
df_pred_occP10 <- data.frame("sind" = unique(post_intPCTT.gam.df$sind), "occasional" = 1, "daily" = 0, "occ_PCTT" = 10, "dly_PCTT" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT.gam.df$subject_id[1])
lpmat_occP10 <- predict(post_intPCTT_gam.fri, newdata=df_pred_occP10, se.fit=TRUE, type = "lpmatrix")
df_pred_occM10 <- data.frame("sind" = unique(post_intPCTT.gam.df$sind), "occasional" = 1, "daily" = 0, "occ_PCTT" = -10, "dly_PCTT" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT.gam.df$subject_id[1])
lpmat_occM10 <- predict(post_intPCTT_gam.fri, newdata=df_pred_occM10, se.fit=TRUE, type = "lpmatrix")
# df_pred_dly <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 0, "daily" = 1,
# "Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
# "Phi4" = 0,
# "subject_id" = right_0.gam.df$subject_id[1])
#
# lpmat_dly <- predict(post_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "lpmatrix")
pred_df <- data.frame(seconds = rep(seq(0, 400), 4)/30,
user = c(rep("non-user", 401), rep("occasional", 401), rep("occ+10", 401), rep("occ-10", 401)
# rep("daily", 401)
),
mean = c(lpmat_non %*% post_intPCTT_gam.coef,
lpmat_occ %*% post_intPCTT_gam.coef,
lpmat_occP10 %*% post_intPCTT_gam.coef,
lpmat_occM10 %*% post_intPCTT_gam.coef
# lpmat_dly %*% post_gam.coef
),
se = c(sqrt(diag(lpmat_non %*% post_intPCTT_gam.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_occ %*% post_intPCTT_gam.cov %*% t(lpmat_occ))),
sqrt(diag(lpmat_occP10 %*% post_intPCTT_gam.cov %*% t(lpmat_occP10))),
sqrt(diag(lpmat_occM10 %*% post_intPCTT_gam.cov %*% t(lpmat_occM10)))
# sqrt(diag(lpmat_dly %*% post_gam.cov %*% t(lpmat_dly)))
),
stringsAsFactors = F)
post_userProfile <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
labs(y = "Percent Change")+
theme_bw()
legend_userProfile <- g_legend(post_userProfile)
post_userProfile.se <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
geom_line(aes(x=seconds, y = mean + 2*se, group = user, col = user),
linetype = "longdash")+
geom_line(aes(x=seconds, y = mean - 2*se, group = user, col = user),
linetype = "longdash")+
labs(y = "")+
theme_bw()
post_userTraj <- grid.arrange(arrangeGrob(post_userProfile+theme(legend.position = "none"),
post_userProfile.se+theme(legend.position = "none"), nrow = 1),
legend_userProfile, nrow = 1,
widths = c(30, 6))
# post_intPCTT_gam.coef <- post_intPCTT_gam.fri$coefficients
# post_intPCTT_gam.cov <- vcov.gam(post_intPCTT_gam.fri)
#
# df_pred_non <- data.frame("sind" = unique(post_intPCTT.gam.df$sind), "occasional" = 0, "daily" = 0, "occ_PCTT" = 0, "dly_PCTT" = 0,
# "Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
# "subject_id" = post_intPCTT.gam.df$subject_id[1])
#
# lpmat_non <- predict(post_intPCTT_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
df_pred_dly <- data.frame("sind" = unique(post_intPCTT.gam.df$sind), "occasional" = 0, "daily" = 1, "occ_PCTT" = 0, "dly_PCTT" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT.gam.df$subject_id[1])
lpmat_dly <- predict(post_intPCTT_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "lpmatrix")
df_pred_dlyP10 <- data.frame("sind" = unique(post_intPCTT.gam.df$sind), "occasional" = 0, "daily" = 1, "occ_PCTT" = 0, "dly_PCTT" = 10,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT.gam.df$subject_id[1])
lpmat_dlyP10 <- predict(post_intPCTT_gam.fri, newdata=df_pred_dlyP10, se.fit=TRUE, type = "lpmatrix")
df_pred_dlyM10 <- data.frame("sind" = unique(post_intPCTT.gam.df$sind), "occasional" = 0, "daily" = 1, "occ_PCTT" = 0, "dly_PCTT" = -10,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT.gam.df$subject_id[1])
lpmat_dlyM10 <- predict(post_intPCTT_gam.fri, newdata=df_pred_dlyM10, se.fit=TRUE, type = "lpmatrix")
# df_pred_dly <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 0, "daily" = 1,
# "Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
# "Phi4" = 0,
# "subject_id" = right_0.gam.df$subject_id[1])
#
# lpmat_dly <- predict(post_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "lpmatrix")
pred_df <- data.frame(seconds = rep(seq(0, 400), 4)/30,
user = c(rep("non-user", 401), rep("daily", 401), rep("dly+10", 401), rep("dly-10", 401)
# rep("daily", 401)
),
mean = c(lpmat_non %*% post_intPCTT_gam.coef,
lpmat_dly %*% post_intPCTT_gam.coef,
lpmat_dlyP10 %*% post_intPCTT_gam.coef,
lpmat_dlyM10 %*% post_intPCTT_gam.coef
# lpmat_dly %*% post_gam.coef
),
se = c(sqrt(diag(lpmat_non %*% post_intPCTT_gam.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_dly %*% post_intPCTT_gam.cov %*% t(lpmat_dly))),
sqrt(diag(lpmat_dlyP10 %*% post_intPCTT_gam.cov %*% t(lpmat_dlyP10))),
sqrt(diag(lpmat_dlyM10 %*% post_intPCTT_gam.cov %*% t(lpmat_dlyM10)))
# sqrt(diag(lpmat_dly %*% post_gam.cov %*% t(lpmat_dly)))
),
stringsAsFactors = F)
post_userProfile <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
labs(y = "Percent Change")+
theme_bw()
legend_userProfile <- g_legend(post_userProfile)
post_userProfile.se <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
geom_line(aes(x=seconds, y = mean + 2*se, group = user, col = user),
linetype = "longdash")+
geom_line(aes(x=seconds, y = mean - 2*se, group = user, col = user),
linetype = "longdash")+
labs(y = "")+
theme_bw()
post_userTraj <- grid.arrange(arrangeGrob(post_userProfile+theme(legend.position = "none"),
post_userProfile.se+theme(legend.position = "none"), nrow = 1),
legend_userProfile, nrow = 1,
widths = c(30, 6))
df_pred_occInt <- data.frame("sind" = unique(post_intPCTT.gam.df$sind),
"occasional" = 0, "daily" = 0,
"occ_PCTT" = 10, "dly_PCTT" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT.gam.df$subject_id[1])
# lpmat_occInt <- predict(post_intPCTT_gam.fri, newdata=df_pred_occInt, se.fit=TRUE, type = "lpmatrix")
pred_f1 <- predict(post_intPCTT_gam.fri, newdata=df_pred_occInt, se.fit=TRUE, type = "terms")
# pred_f2 <- predict(post_intPCTT_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "terms")
pred_coef_df <- data.frame(seconds = seq(0, 400)/30,
# f0_hat = lpmat_non %*% post_gam.coef,
# f0_se = sqrt(diag(lpmat_non %*% post_gam.cov %*% t(lpmat_non))),
f1_hat = pred_f1$fit[, 4],
f1_se = pred_f1$se.fit[, 4]
# ,
# f2_hat = pred_f2$fit[, 3],
# f2_se = pred_f2$se.fit[, 3]
)
# post_non_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f0_hat))+
# geom_line()+
# geom_line(aes(x = seconds, y = f0_hat + 2*f0_se), linetype = "longdash")+
# geom_line(aes(x = seconds, y = f0_hat - 2*f0_se), linetype = "longdash")+
# labs(title = "Average Percent Change of a Non-user (Post)", ylab = "Percent Change")+
# theme_bw()
post_occ_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f1_hat))+
geom_line()+
geom_line(aes(x = seconds, y = f1_hat + 2*f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f1_hat - 2*f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = 0), col = "darkred")+
labs(title = "Difference in Percent Change: Occasional Interaction (Post)",
y= "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
# post_dly_plt <- ggplot(data = pred_coef_df, aes(x=seconds, y = f2_hat))+
# geom_line()+
# geom_line(aes(x = seconds, y = f2_hat + 2*f2_se), linetype = "longdash")+
# geom_line(aes(x = seconds, y = f2_hat - 2*f2_se), linetype = "longdash")+
# geom_line(aes(x = seconds, y = 0), col = "darkred")+
# labs(title = "Difference in Percent Change: Daily and Non-user (Post)",
# y = "")+
# theme_bw()+
# scale_x_continuous(expand = c(0, 0))+
# theme(rect = element_rect(fill = "transparent"))
# ylab <- textGrob(label = "Difference in Percent Change", rot = 90,
# gp = gpar(fontsize = 12))
# posval <- textGrob(label = sapply(strwrap("Less Constriction in User", width = 15, simplify = F),
# paste, collapse = "\n"), rot = 0,
# gp = gpar(fontsize = 8))
# negval <- textGrob(label = sapply(strwrap("More Constriction in User", width = 15, simplify = F),
# paste, collapse = "\n"), rot = 0,
# gp = gpar(fontsize = 8))
#
# post_occ_coef <- grid.arrange(ylab, posval, negval, post_occ_plt,
# ncol = 2, nrow = 2,
# layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
#
# post_dly_coef <- grid.arrange(ylab, posval, negval, post_dly_plt,
# ncol = 2, nrow = 2,
# layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
post_occ_plt
Plot difference between daily and occasional user
f2_f1 <- (lpmat_dly - lpmat_occ) %*% post_gam.coef
f2_f1.se <- sqrt(diag((lpmat_dly - lpmat_occ) %*% post_gam.cov %*% t((lpmat_dly - lpmat_occ))))
f2_f1_diff_df <- data.frame(seconds = seq(0, 400)/30,
f2f1_diff = f2_f1,
f2f1_se = f2_f1.se)
ggplot(data = f2_f1_diff_df, aes(x=seconds, y = f2f1_diff))+
geom_line()+
geom_line(aes(x = seconds, y = f2f1_diff + 2*f2f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f2f1_diff - 2*f2f1_se), linetype = "longdash")+
labs(title = "Difference between Average Daily vs Occasional User", ylab = "f2_hat - f1_hat")+
theme_bw()
post_dlyocc_plt <- ggplot(data = f2_f1_diff_df, aes(x=seconds, y = f2f1_diff))+
geom_line()+
geom_line(aes(x = seconds, y = f2f1_diff + 2*f2f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = f2f1_diff - 2*f2f1_se), linetype = "longdash")+
geom_line(aes(x = seconds, y = 0), col = "darkred")+
labs(title = "Difference in Percent Change: Daily vs Occasional User",
y = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
ylab <- textGrob(label = "Difference in Percent Change", rot = 90,
gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("Less Constriction in Daily User", width = 15,
simplify = F),
paste, collapse = "\n"), rot = 0, just = "bottom",
gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("More Constriction in Daily User", width = 15,
simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
post_dlyocc_coef <- grid.arrange(ylab, posval, negval, post_dlyocc_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)),
c(1, 3, rep(4, 15))))
post_dlyocc_coef
pt.analytic.df$smoker <- ifelse(pt.analytic.df$user_cat == "non-user", 0, 1)
pt.analytic.df$smoke_PCTT <- pt.analytic.df$smoker*pt.analytic.df$postConsumpTimeToTest_c
pt.analytic.df$smoke_PCTT[pt.analytic.df$user == "non-user"] <- 0
by(data = pt.analytic.df$postConsumpTimeToTest_c, INDICES = pt.analytic.df$smoker, FUN = summary)
## pt.analytic.df$smoker: 0
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## NA NA NA NaN NA NA 29
## ------------------------------------------------------------
## pt.analytic.df$smoker: 1
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -9.218000 -3.218000 -0.218000 0.000182 2.782000 21.782000
post_intPCTT_smoke.gam.df <- merge(right_0.post, pt.analytic.df,
by = c("subject_id", "user_cat", "eye"))
# right_0.gam.df <- right_0.gam.df[order(right_0.gam.df$subject_id, right_0.gam.df$frame), ]
post_intPCTT_smoke.gam.df$sind <- post_intPCTT_smoke.gam.df$frame/400
# head(right_0.gam.df)
m.post_intPCTT_smoke_gam <- mgcv::bam(percent_change ~ s(sind, k=30, bs="cr") +
s(sind, by=smoker, k=30, bs = "cr") +
s(sind, by=smoke_PCTT, k=30, bs = "cr"),
data = post_intPCTT_smoke.gam.df, method = "REML")
summary(m.post_intPCTT_smoke_gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = smoker,
## k = 30, bs = "cr") + s(sind, by = smoke_PCTT, k = 30, bs = "cr")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.26266 0.07114 -172.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 21.930 25.33 197.05 <2e-16 ***
## s(sind):smoker 10.136 12.31 29.67 <2e-16 ***
## s(sind):smoke_PCTT 9.679 11.80 28.37 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.249 Deviance explained = 25%
## -REML = 1.1246e+05 Scale est. = 57.308 n = 32642
## Create a matrix of residuals
post_intPCTT_smoke_gam.resid <- cbind(post_intPCTT_smoke.gam.df[!(is.na(post_intPCTT_smoke.gam.df$percent_change)),
c("subject_id", "frame")],
m.post_intPCTT_smoke_gam$residuals)
names(post_intPCTT_smoke_gam.resid) <- c("subject_id", "frame", "resid")
# post_gam.resid$frame_char <- str_pad(post_gam.resid$frame, width = 3, side = "left", pad = "0")
resid_mat <- reshape(post_intPCTT_smoke_gam.resid[, c("subject_id", "frame", "resid")],
timevar = "frame",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
# resid_names <- names(resid_mat)
# resid_names <- sort(resid_names)
# resid_mat <- resid_mat[, resid_names]
resid_mat <- as.matrix(resid_mat)
post_intPCTT_smoke_gam.fpca <- fpca.face(resid_mat, knots = 15)
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_intPCTT_smoke_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_intPCTT_smoke_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
post_intPCTT_smoke.gam.df <- merge(post_intPCTT_smoke.gam.df, Phi_mat,
by = "frame",
all.x = T)
post_intPCTT_smoke.gam.df <- post_intPCTT_smoke.gam.df[order(post_intPCTT_smoke.gam.df$subject_id,
post_intPCTT_smoke.gam.df$frame), ]
post_intPCTT_smoke.gam.df$subject_id <- as.factor(post_intPCTT_smoke.gam.df$subject_id)
post_intPCTT_smoke.gam.fri <- mgcv::bam(percent_change ~
s(sind, k=30, bs="cr") +
s(sind, by=smoker, k=30, bs = "cr") +
s(sind, by=smoke_PCTT, k=30, bs = "cr") +
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"),
method = "fREML", data = post_intPCTT_smoke.gam.df, discrete = TRUE)
summary(post_intPCTT_smoke.gam.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = smoker,
## k = 30, bs = "cr") + s(sind, by = smoke_PCTT, k = 30, bs = "cr") +
## s(subject_id, by = Phi1, bs = "re") + s(subject_id, by = Phi2,
## bs = "re") + s(subject_id, by = Phi3, bs = "re") + s(subject_id,
## by = Phi4, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.3191 0.9644 -10.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 27.31 28.28 119.67 <2e-16 ***
## s(sind):smoker 19.50 22.79 68.66 <2e-16 ***
## s(sind):smoke_PCTT 19.44 22.89 21.21 <2e-16 ***
## s(subject_id):Phi1 82.17 84.00 23297.74 <2e-16 ***
## s(subject_id):Phi2 81.60 84.00 2639.64 <2e-16 ***
## s(subject_id):Phi3 82.24 84.00 792.85 <2e-16 ***
## s(subject_id):Phi4 81.01 84.00 1141.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.966 Deviance explained = 96.6%
## fREML = 63220 Scale est. = 2.6117 n = 32642
# create plot categories based on the 25th, 50th and 75th percentile
plot_cat <- c(59, 62, 65) - round(mean(pt.analytic.df$postConsumpTimeToTest, na.rm = T), 3)
post_intPCTT_smoke_gam.coef <- post_intPCTT_smoke.gam.fri$coefficients
post_intPCTT_smoke_gam.cov <- vcov.gam(post_intPCTT_smoke.gam.fri)
### -- Non-smoker
df_pred_non <- data.frame("sind" = unique(post_intPCTT_smoke.gam.df$sind),
"smoker" = 0,
"smoke_PCTT" = 0,
"dly_PCTT" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT_smoke.gam.df$subject_id[1])
lpmat_non <- predict(post_intPCTT_smoke.gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
### -- Smoker with median PCTT
df_pred_smoke50 <- data.frame("sind" = unique(post_intPCTT_smoke.gam.df$sind),
"smoker" = 1,
"smoke_PCTT" = plot_cat[2],
"dly_PCTT" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT_smoke.gam.df$subject_id[1])
lpmat_smoke50 <- predict(post_intPCTT_smoke.gam.fri, newdata=df_pred_smoke50, se.fit=TRUE, type = "lpmatrix")
### -- Smoker with 25th%-ile PCTT
df_pred_smoke25 <- data.frame("sind" = unique(post_intPCTT_smoke.gam.df$sind),
"smoker" = 1,
"smoke_PCTT" = plot_cat[1],
"dly_PCTT" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT_smoke.gam.df$subject_id[1])
lpmat_smoke25 <- predict(post_intPCTT_smoke.gam.fri, newdata=df_pred_smoke25, se.fit=TRUE, type = "lpmatrix")
### -- Smoker with 75th%-ile PCTT
df_pred_smoke75 <- data.frame("sind" = unique(post_intPCTT_smoke.gam.df$sind),
"smoker" = 1,
"smoke_PCTT" = plot_cat[3],
"dly_PCTT" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT_smoke.gam.df$subject_id[1])
lpmat_smoke75 <- predict(post_intPCTT_smoke.gam.fri, newdata=df_pred_smoke75, se.fit=TRUE, type = "lpmatrix")
pred_df <- data.frame(seconds = rep(seq(0, 400), 4)/30,
user = c(rep("non-smoker", 401),
rep("smoker-62", 401),
rep("smoker-59", 401),
rep("smoker-65", 401)
),
mean = c(lpmat_non %*% post_intPCTT_smoke_gam.coef,
lpmat_smoke50 %*% post_intPCTT_smoke_gam.coef,
lpmat_smoke25 %*% post_intPCTT_smoke_gam.coef,
lpmat_smoke75 %*% post_intPCTT_smoke_gam.coef
),
se = c(sqrt(diag(lpmat_non %*% post_intPCTT_smoke_gam.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_smoke50 %*% post_intPCTT_smoke_gam.cov %*% t(lpmat_smoke50))),
sqrt(diag(lpmat_smoke25 %*% post_intPCTT_smoke_gam.cov %*% t(lpmat_smoke25))),
sqrt(diag(lpmat_smoke75 %*% post_intPCTT_smoke_gam.cov %*% t(lpmat_smoke75)))
),
stringsAsFactors = F)
post_smokerProfile <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
labs(y = "Percent Change")+
theme_bw()
legend_smokerProfile <- g_legend(post_smokerProfile)
post_smokerProfile.se <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
geom_line(aes(x=seconds, y = mean + 2*se, group = user, col = user),
linetype = "longdash")+
geom_line(aes(x=seconds, y = mean - 2*se, group = user, col = user),
linetype = "longdash")+
labs(y = "")+
theme_bw()
post_smokerTraj <- grid.arrange(arrangeGrob(post_smokerProfile+theme(legend.position = "none"),
post_smokerProfile.se+theme(legend.position = "none"), nrow = 1),
legend_smokerProfile, nrow = 1,
widths = c(30, 6))
### Subset to only smokers and remove previously calculated "Phi" values
post_intPCTT_smoke.gam.df <- post_intPCTT_smoke.gam.df[post_intPCTT_smoke.gam.df$smoker == 1, ]
keep.vars <- names(post_intPCTT_smoke.gam.df)[grepl("Phi", names(post_intPCTT_smoke.gam.df)) == FALSE]
post_intPCTT_smoke.gam.df <- post_intPCTT_smoke.gam.df[, keep.vars]
post_intPCTT_smoke.gam.df <- post_intPCTT_smoke.gam.df[!(is.na(post_intPCTT_smoke.gam.df$sind)), ]
# head(right_0.gam.df)
m.post_intPCTT_smoke_gam <- mgcv::bam(percent_change ~ s(sind, k=30, bs="cr") +
s(sind, by=postConsumpTimeToTest_c, k=30, bs = "cr"),
data = post_intPCTT_smoke.gam.df, method = "REML")
summary(m.post_intPCTT_smoke_gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = postConsumpTimeToTest_c,
## k = 30, bs = "cr")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.4609 0.0436 -262.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 21.88 25.37 313.50 <2e-16 ***
## s(sind):postConsumpTimeToTest_c 10.98 13.38 35.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.284 Deviance explained = 28.5%
## -REML = 69670 Scale est. = 40.446 n = 21295
## Create a matrix of residuals
post_intPCTT_smoke_gam.resid <- cbind(post_intPCTT_smoke.gam.df[!(is.na(post_intPCTT_smoke.gam.df$percent_change)),
c("subject_id", "frame")],
m.post_intPCTT_smoke_gam$residuals)
names(post_intPCTT_smoke_gam.resid) <- c("subject_id", "frame", "resid")
# post_gam.resid$frame_char <- str_pad(post_gam.resid$frame, width = 3, side = "left", pad = "0")
resid_mat <- reshape(post_intPCTT_smoke_gam.resid[, c("subject_id", "frame", "resid")],
timevar = "frame",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
# resid_names <- names(resid_mat)
# resid_names <- sort(resid_names)
# resid_mat <- resid_mat[, resid_names]
resid_mat <- as.matrix(resid_mat)
post_intPCTT_smoke_gam.fpca <- fpca.face(resid_mat, knots = 15)
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_intPCTT_smoke_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_intPCTT_smoke_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
post_intPCTT_smoke.gam.df <- merge(post_intPCTT_smoke.gam.df, Phi_mat,
by = "frame",
all.x = T)
post_intPCTT_smoke.gam.df <- post_intPCTT_smoke.gam.df[order(post_intPCTT_smoke.gam.df$subject_id,
post_intPCTT_smoke.gam.df$frame), ]
post_intPCTT_smoke.gam.df$subject_id <- as.factor(post_intPCTT_smoke.gam.df$subject_id)
post_intPCTT_smoke.gam.fri <- mgcv::bam(percent_change ~
s(sind, k=30, bs="cr") +
s(sind, by=postConsumpTimeToTest_c, k=30, bs = "cr") +
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"),
method = "fREML", data = post_intPCTT_smoke.gam.df, discrete = TRUE)
summary(post_intPCTT_smoke.gam.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = postConsumpTimeToTest_c,
## k = 30, bs = "cr") + s(subject_id, by = Phi1, bs = "re") +
## s(subject_id, by = Phi2, bs = "re") + s(subject_id, by = Phi3,
## bs = "re") + s(subject_id, by = Phi4, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.777 0.730 -13.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 27.42 28.51 75.94 <2e-16 ***
## s(sind):postConsumpTimeToTest_c 18.26 21.66 21.07 <2e-16 ***
## s(subject_id):Phi1 53.74 55.00 18806.30 <2e-16 ***
## s(subject_id):Phi2 53.48 55.00 5056.00 <2e-16 ***
## s(subject_id):Phi3 53.52 55.00 652.30 <2e-16 ***
## s(subject_id):Phi4 53.49 55.00 570.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.965 Deviance explained = 96.5%
## fREML = 38357 Scale est. = 1.9878 n = 21295
# create plot categories based on the 25th, 50th and 75th percentile
post_intPCTT_smoke_gam.coef <- post_intPCTT_smoke.gam.fri$coefficients
post_intPCTT_smoke_gam.cov <- vcov.gam(post_intPCTT_smoke.gam.fri)
### -- Smoker with median PCTT
df_pred_smoke50 <- data.frame("sind" = unique(post_intPCTT_smoke.gam.df$sind),
"postConsumpTimeToTest_c" = plot_cat[2],
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT_smoke.gam.df$subject_id[1])
lpmat_smoke50 <- predict(post_intPCTT_smoke.gam.fri, newdata=df_pred_smoke50, se.fit=TRUE, type = "lpmatrix")
### -- Smoker with 25th%-ile PCTT
df_pred_smoke25 <- data.frame("sind" = unique(post_intPCTT_smoke.gam.df$sind),
"postConsumpTimeToTest_c" = plot_cat[1],
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT_smoke.gam.df$subject_id[1])
lpmat_smoke25 <- predict(post_intPCTT_smoke.gam.fri, newdata=df_pred_smoke25, se.fit=TRUE, type = "lpmatrix")
### -- Smoker with 75th%-ile PCTT
df_pred_smoke75 <- data.frame("sind" = unique(post_intPCTT_smoke.gam.df$sind),
"postConsumpTimeToTest_c" = plot_cat[3],
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0, "Phi4" = 0,
"subject_id" = post_intPCTT_smoke.gam.df$subject_id[1])
lpmat_smoke75 <- predict(post_intPCTT_smoke.gam.fri, newdata=df_pred_smoke75, se.fit=TRUE, type = "lpmatrix")
pred_df <- data.frame(seconds = rep(seq(0, 400), 3)/30,
user = c(rep("smoker-62", 401),
rep("smoker-59", 401),
rep("smoker-65", 401)
),
mean = c(lpmat_smoke50 %*% post_intPCTT_smoke_gam.coef,
lpmat_smoke25 %*% post_intPCTT_smoke_gam.coef,
lpmat_smoke75 %*% post_intPCTT_smoke_gam.coef
),
se = c(sqrt(diag(lpmat_smoke50 %*% post_intPCTT_smoke_gam.cov %*% t(lpmat_smoke50))),
sqrt(diag(lpmat_smoke25 %*% post_intPCTT_smoke_gam.cov %*% t(lpmat_smoke25))),
sqrt(diag(lpmat_smoke75 %*% post_intPCTT_smoke_gam.cov %*% t(lpmat_smoke75)))
),
stringsAsFactors = F)
post_smokerProfile <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
labs(y = "Percent Change")+
theme_bw()
legend_smokerProfile <- g_legend(post_smokerProfile)
post_smokerProfile.se <- ggplot(data = pred_df, aes(x = seconds, y = mean, group = user, col = user))+
geom_line()+
geom_line(aes(x=seconds, y = mean + 2*se, group = user, col = user),
linetype = "longdash")+
geom_line(aes(x=seconds, y = mean - 2*se, group = user, col = user),
linetype = "longdash")+
labs(y = "")+
theme_bw()
post_OnlysmokerTraj <- grid.arrange(arrangeGrob(post_smokerProfile+theme(legend.position = "none"),
post_smokerProfile.se+theme(legend.position = "none"), nrow = 1),
legend_smokerProfile, nrow = 1,
widths = c(30, 6))
4.1 Comments for Ben